@@ -127,6 +127,9 @@ llama_kv_cache_unified::llama_kv_cache_unified(
127127 ggml_type_name (type_k), (float )memory_size_k / (1024 .0f * 1024 .0f ),
128128 ggml_type_name (type_v), (float )memory_size_v / (1024 .0f * 1024 .0f ));
129129 }
130+
131+ const char * LLAMA_KV_CACHE_DEBUG = getenv (" LLAMA_KV_CACHE_DEBUG" );
132+ debug = LLAMA_KV_CACHE_DEBUG ? atoi (LLAMA_KV_CACHE_DEBUG) : 0 ;
130133}
131134
132135void llama_kv_cache_unified::clear (bool data) {
@@ -517,36 +520,58 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
517520 return -1 ;
518521 }
519522
520- // #define FIND_SLOT_DEBUG 1
521- # if FIND_SLOT_DEBUG
522- LLAMA_LOG_WARN ( " begin : n = %5d, used = %5d, head = %5d, n_swa = %5d\n " , cells.used_max_p1 (), cells.get_used (), head, n_swa);
523+ if (debug > 0 ) {
524+ LLAMA_LOG_CONT ( " \n " );
525+ LLAMA_LOG_DEBUG ( " %s : n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n " , __func__, cells.used_max_p1 (), cells.get_used (), head, get_size () , n_swa);
523526
524- // for debugging
525- {
526- std::string ss;
527- if (n_swa > 0 ) {
527+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
528+ std::string ss;
528529 for (uint32_t i = 0 ; i < cells.size (); ++i) {
529530 if (cells.is_empty (i)) {
530531 ss += ' .' ;
531532 } else {
532533 ss += std::to_string (cells.seq_get (i));
533534 }
534535 if (i%256 == 255 ) {
536+ ss += " *" ;
535537 ss += ' \n ' ;
536538 }
537539 }
540+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
538541 }
539- LLAMA_LOG_WARN (" \n %s\n " , ss.c_str ());
540- }
541542
542- for (int s = 0 ; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
543- if (cells.seq_pos_min (s) < 0 ) {
544- continue ;
543+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
544+ std::string ss;
545+ for (uint32_t i = 0 ; i < cells.size (); ++i) {
546+ std::string cur;
547+ if (cells.is_empty (i)) {
548+ cur = ' .' ;
549+ } else {
550+ cur = std::to_string (cells.pos_get (i));
551+ }
552+ const int n = cur.size ();
553+ for (int j = 0 ; j < 5 - n; ++j) {
554+ cur += ' ' ;
555+ }
556+ ss += cur;
557+ if (i%256 == 255 ) {
558+ ss += " *" ;
559+ }
560+ if (i%64 == 63 ) {
561+ ss += ' \n ' ;
562+ }
563+ }
564+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
545565 }
546566
547- LLAMA_LOG_WARN (" kv_cells: n_swa = %4d, min[%d] = %5d, max[%d] = %5d\n " , n_swa, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
567+ for (int s = 0 ; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
568+ if (cells.seq_pos_min (s) < 0 ) {
569+ continue ;
570+ }
571+
572+ LLAMA_LOG_DEBUG (" %s: min[%d] = %5d, max[%d] = %5d\n " , __func__, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
573+ }
548574 }
549- #endif
550575
551576 uint32_t n_tested = 0 ;
552577
0 commit comments