Skip to content

Commit 8ee4ae5

Browse files
Fix TRTLLM attention logging to show both prefill and decode messages
Signed-off-by: jasonlizhengjian <[email protected]>
1 parent c412556 commit 8ee4ae5

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

vllm/utils/flashinfer.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,15 +286,16 @@ def use_trtllm_attention(
286286
if is_prefill:
287287
# Prefill auto-detection
288288
use_trtllm = (max_seq_len <= 131072 and kv_cache_dtype == "auto")
289+
if use_trtllm:
290+
logger.warning_once(
291+
"Using TRTLLM prefill attention (auto-detected).")
289292
else:
290293
# Decode auto-detection
291294
use_trtllm = (num_tokens <= 256 and max_seq_len <= 131072
292295
and kv_cache_dtype == "auto")
293-
294-
if use_trtllm:
295-
context = "prefill" if is_prefill else "decode"
296-
logger.warning_once(
297-
f"Using TRTLLM {context} attention (auto-detected).")
296+
if use_trtllm:
297+
logger.warning_once(
298+
"Using TRTLLM decode attention (auto-detected).")
298299
return use_trtllm
299300

300301
# Environment variable is set to 1 - respect it

0 commit comments

Comments
 (0)