Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions vllm/v1/engine/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ def _validate_allowed_token_ids(
return
if params.allowed_token_ids is None:
return
if not all(0 <= tid < self.model_config.vocab_size
for tid in params.allowed_token_ids):
if not params.allowed_token_ids:
raise ValueError("allowed_token_ids is not None and empty!")
vocab_size = self.model_config.get_vocab_size()
if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
raise ValueError(
"allowed_token_ids contains out-of-vocab token id")
"allowed_token_ids contains out-of-vocab token id!")

def process_inputs(
self,
Expand Down
7 changes: 5 additions & 2 deletions vllm/v1/worker/gpu_input_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ def __init__(
self.logit_bias: list[Optional[dict[int,
float]]] = [None] * max_num_reqs
self.has_allowed_token_ids: set[str] = set()
# NOTE(lufang): In the mask tensor, if the corresponding token allowed,
# the value is False. Since we use masked_fill_ to set -inf.
self.allowed_token_ids_mask: Optional[torch.Tensor] = None
self.allowed_token_ids_mask_cpu_tensor: Optional[torch.Tensor] = None

Expand Down Expand Up @@ -309,8 +311,9 @@ def add_request(
self.vocab_size,
dtype=torch.bool,
device="cpu")
self.allowed_token_ids_mask_cpu_tensor[req_index] = True
self.allowed_token_ids_mask_cpu_tensor[req_index][
sampling_params.allowed_token_ids] = True
sampling_params.allowed_token_ids] = False

# Add request lora ID
if request.lora_request:
Expand Down Expand Up @@ -359,7 +362,7 @@ def remove_request(self, req_id: str) -> Optional[int]:
self.logit_bias[req_index] = None
self.has_allowed_token_ids.discard(req_id)
if self.allowed_token_ids_mask_cpu_tensor is not None:
self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(False)
self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@houseroad this should also be reverted right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, added more comments to help understand

return req_index

def swap_states(self, i1: int, i2: int) -> None:
Expand Down