Skip to content

Commit 3452bcf

Browse files
committed
[bugfix] use wrong slice for hidden_states before ag
Signed-off-by: QiuChunshuo <[email protected]>
1 parent 5122288 commit 3452bcf

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2630,7 +2630,7 @@ def execute_model(
26302630
# NOTE(qcs): For PCP, we pad num_scheduled_tokens_np but
26312631
# do not update total_num_scheduled_tokens in scheduler_output
26322632
num_input_tokens = self._get_num_input_tokens(
2633-
sum(num_scheduled_tokens_np)
2633+
num_scheduled_tokens_np.sum()
26342634
)
26352635
else:
26362636
num_input_tokens = self._get_num_input_tokens(
@@ -2706,7 +2706,7 @@ def execute_model(
27062706
# NOTE we must `slice` hidden_states because pcp_allgather_restore_idx
27072707
# ignores the padding from CUDA Graph.
27082708
hidden_states = get_pcp_group().all_gather(
2709-
hidden_states[:num_scheduled_tokens],
2709+
hidden_states[:num_input_tokens],
27102710
0,
27112711
)
27122712
hidden_states = torch.index_select(

0 commit comments

Comments
 (0)