Fix TRTLLM attention logging to show both prefill and decode messages

jasonlizhengjian · jasonlizhengjian · commit f808ba9aad52 · 2025-10-01T20:04:25.000Z
Signed-off-by: jasonlizhengjian &lt;jasonlizhengjian@gmail.com&gt;
diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py
@@ -277,15 +277,16 @@ def use_trtllm_attention(
         if is_prefill:
             # Prefill auto-detection
             use_trtllm = (max_seq_len <= 131072 and kv_cache_dtype == "auto")
+            if use_trtllm:
+                logger.warning_once(
+                    "Using TRTLLM prefill attention (auto-detected).")
         else:
             # Decode auto-detection
             use_trtllm = (num_tokens <= 256 and max_seq_len <= 131072
                           and kv_cache_dtype == "auto")
-
-        if use_trtllm:
-            context = "prefill" if is_prefill else "decode"
-            logger.warning_once(
-                f"Using TRTLLM {context} attention (auto-detected).")
+            if use_trtllm:
+                logger.warning_once(
+                    "Using TRTLLM decode attention (auto-detected).")
         return use_trtllm
 
     # Environment variable is set to 1 - respect it