kv-cache : add LLAMA_KV_CACHE_DEBUG environment variable (ggml-org#14121)

ggerganov · Minh141120 · commit 0ca0399da3b5 · 2025-07-05T23:24:32.000+07:00
diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
@@ -156,6 +156,9 @@ llama_kv_cache_unified::llama_kv_cache_unified(
 
     const char * LLAMA_KV_CACHE_DEBUG = getenv("LLAMA_KV_CACHE_DEBUG");
     debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0;
+
+    const char * LLAMA_KV_CACHE_DEBUG = getenv("LLAMA_KV_CACHE_DEBUG");
+    debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0;
 }
 
 void llama_kv_cache_unified::clear(bool data) {
@@ -556,8 +559,11 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
     }
 
     if (debug > 0) {
+        LLAMA_LOG_CONT("\n");
         LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n", __func__, cells.used_max_p1(), cells.get_used(), head, get_size(), n_swa);
 
+        if ((debug == 2 && n_swa > 0) || debug > 2) {
+            std::string ss;
         if ((debug == 2 && n_swa > 0) || debug > 2) {
             std::string ss;
             for (uint32_t i = 0; i < cells.size(); ++i) {
@@ -604,14 +610,17 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
             LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
         }
 
-        for (int s = 0; s < LLAMA_MAX_SEQ; ++s) {
+        for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
             if (cells.seq_pos_min(s) < 0) {
                 continue;
             }
 
             LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
         }
     }
+            LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
+        }
+    }
 
     uint32_t n_tested = 0;
 

Original file line number	Diff line number	Diff line change
`@@ -156,6 +156,9 @@ llama_kv_cache_unified::llama_kv_cache_unified(`
`156`	`156`
`157`	`157`	`const char * LLAMA_KV_CACHE_DEBUG = getenv("LLAMA_KV_CACHE_DEBUG");`
`158`	`158`	`debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0;`
	`159`	`+`
	`160`	`+ const char * LLAMA_KV_CACHE_DEBUG = getenv("LLAMA_KV_CACHE_DEBUG");`
	`161`	`+ debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0;`
`159`	`162`	`}`
`160`	`163`
`161`	`164`	`void llama_kv_cache_unified::clear(bool data) {`
`@@ -556,8 +559,11 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {`
`556`	`559`	`}`
`557`	`560`
`558`	`561`	`if (debug > 0) {`
	`562`	`+ LLAMA_LOG_CONT("\n");`
`559`	`563`	`LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n", __func__, cells.used_max_p1(), cells.get_used(), head, get_size(), n_swa);`
`560`	`564`
	`565`	`+ if ((debug == 2 && n_swa > 0) \|\| debug > 2) {`
	`566`	`+ std::string ss;`
`561`	`567`	`if ((debug == 2 && n_swa > 0) \|\| debug > 2) {`
`562`	`568`	`std::string ss;`
`563`	`569`	`for (uint32_t i = 0; i < cells.size(); ++i) {`
`@@ -604,14 +610,17 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {`
`604`	`610`	`LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());`
`605`	`611`	`}`
`606`	`612`
`607`		`- for (int s = 0; s < LLAMA_MAX_SEQ; ++s) {`
	`613`	`+ for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {`
`608`	`614`	`if (cells.seq_pos_min(s) < 0) {`
`609`	`615`	`continue;`
`610`	`616`	`}`
`611`	`617`
`612`	`618`	`LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));`
`613`	`619`	`}`
`614`	`620`	`}`
	`621`	`+ LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));`
	`622`	`+ }`
	`623`	`+ }`
`615`	`624`
`616`	`625`	`uint32_t n_tested = 0;`
`617`	`626`