diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index e26f97d816ae..759d69293a32 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1721,6 +1721,10 @@ def _dummy_sampler_run( self, hidden_states: torch.Tensor, ) -> torch.Tensor: + # The dummy hidden states may contain special values, + # like `inf` or `nan`. + # To avoid breaking the sampler, we use a random tensor here instead. + hidden_states = torch.rand_like(hidden_states) logits = self.model.compute_logits(hidden_states, None) num_reqs = logits.size(0)