Skip to content

Commit f808ba9

Browse files
Fix TRTLLM attention logging to show both prefill and decode messages
Signed-off-by: jasonlizhengjian <[email protected]>
1 parent 6397563 commit f808ba9

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

vllm/utils/flashinfer.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -277,15 +277,16 @@ def use_trtllm_attention(
277277
if is_prefill:
278278
# Prefill auto-detection
279279
use_trtllm = (max_seq_len <= 131072 and kv_cache_dtype == "auto")
280+
if use_trtllm:
281+
logger.warning_once(
282+
"Using TRTLLM prefill attention (auto-detected).")
280283
else:
281284
# Decode auto-detection
282285
use_trtllm = (num_tokens <= 256 and max_seq_len <= 131072
283286
and kv_cache_dtype == "auto")
284-
285-
if use_trtllm:
286-
context = "prefill" if is_prefill else "decode"
287-
logger.warning_once(
288-
f"Using TRTLLM {context} attention (auto-detected).")
287+
if use_trtllm:
288+
logger.warning_once(
289+
"Using TRTLLM decode attention (auto-detected).")
289290
return use_trtllm
290291

291292
# Environment variable is set to 1 - respect it

0 commit comments

Comments
 (0)