Skip to content

Commit 0967110

Browse files
authored
[Minor] Update the tqdm bar for parallel sampling (#14571)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent fb0acb6 commit 0967110

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

vllm/entrypoints/llm.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,8 +1384,9 @@ def _run_engine(
13841384
if use_tqdm:
13851385
if isinstance(output, RequestOutput):
13861386
# Calculate tokens only for RequestOutput
1387+
n = len(output.outputs)
13871388
assert output.prompt_token_ids is not None
1388-
total_in_toks += len(output.prompt_token_ids)
1389+
total_in_toks += len(output.prompt_token_ids) * n
13891390
in_spd = total_in_toks / pbar.format_dict["elapsed"]
13901391
total_out_toks += sum(
13911392
len(stp.token_ids) for stp in output.outputs)
@@ -1394,7 +1395,7 @@ def _run_engine(
13941395
pbar.postfix = (
13951396
f"est. speed input: {in_spd:.2f} toks/s, "
13961397
f"output: {out_spd:.2f} toks/s")
1397-
pbar.update(len(output.outputs))
1398+
pbar.update(n)
13981399
else:
13991400
pbar.update(1)
14001401

0 commit comments

Comments
 (0)