File tree Expand file tree Collapse file tree 2 files changed +12
-9
lines changed Expand file tree Collapse file tree 2 files changed +12
-9
lines changed Original file line number Diff line number Diff line change @@ -92,10 +92,13 @@ def _validate_allowed_token_ids(
9292 return
9393 if params .allowed_token_ids is None :
9494 return
95- if not all (0 <= tid < self .model_config .vocab_size
95+ if params .allowed_token_ids is not None and len (
96+ params .allowed_token_ids ) == 0 :
97+ raise ValueError ("allowed_token_ids is not None and empty!" )
98+ if not all (0 <= tid < self .model_config .get_vocab_size ()
9699 for tid in params .allowed_token_ids ):
97100 raise ValueError (
98- "allowed_token_ids contains out-of-vocab token id" )
101+ "allowed_token_ids contains out-of-vocab token id! " )
99102
100103 def process_inputs (
101104 self ,
Original file line number Diff line number Diff line change @@ -300,17 +300,17 @@ def add_request(
300300 self .has_allowed_token_ids .add (req_id )
301301 if self .allowed_token_ids_mask_cpu_tensor is None :
302302 # Lazy allocation for this tensor, which can be large.
303- self .allowed_token_ids_mask = torch .zeros (self .max_num_reqs ,
304- self .vocab_size ,
305- dtype = torch .bool ,
306- device = self .device )
307- self .allowed_token_ids_mask_cpu_tensor = torch .zeros (
303+ self .allowed_token_ids_mask = torch .ones (self .max_num_reqs ,
304+ self .vocab_size ,
305+ dtype = torch .bool ,
306+ device = self .device )
307+ self .allowed_token_ids_mask_cpu_tensor = torch .ones (
308308 self .max_num_reqs ,
309309 self .vocab_size ,
310310 dtype = torch .bool ,
311311 device = "cpu" )
312312 self .allowed_token_ids_mask_cpu_tensor [req_index ][
313- sampling_params .allowed_token_ids ] = True
313+ sampling_params .allowed_token_ids ] = False
314314
315315 # Add request lora ID
316316 if request .lora_request :
@@ -359,7 +359,7 @@ def remove_request(self, req_id: str) -> Optional[int]:
359359 self .logit_bias [req_index ] = None
360360 self .has_allowed_token_ids .discard (req_id )
361361 if self .allowed_token_ids_mask_cpu_tensor is not None :
362- self .allowed_token_ids_mask_cpu_tensor [req_index ].fill_ (False )
362+ self .allowed_token_ids_mask_cpu_tensor [req_index ].fill_ (True )
363363 return req_index
364364
365365 def swap_states (self , i1 : int , i2 : int ) -> None :
You can’t perform that action at this time.
0 commit comments