vllm-project · vllm-bot · May 27, 2025 · May 25, 2025 · May 25, 2025 · May 25, 2025
diff --git a/vllm/inputs/__init__.py b/vllm/inputs/__init__.py
@@ -10,8 +10,9 @@
 
 INPUT_REGISTRY = InputRegistry()
 """
-The global {class}`~InputRegistry` which is used by {class}`~vllm.LLMEngine`
-to dispatch data processing according to the target model.
+The global [`InputRegistry`][vllm.inputs.registry.InputRegistry] which is used
+by [`LLMEngine`][vllm.LLMEngine] to dispatch data processing according to the
+target model.
 """
 
 __all__ = [

diff --git a/vllm/inputs/data.py b/vllm/inputs/data.py
@@ -80,22 +80,24 @@ class EmbedsPrompt(TypedDict):
 """
 Set of possible schemas for a single prompt:
 
-- A text prompt ({class}`str` or {class}`TextPrompt`)
-- A tokenized prompt ({class}`TokensPrompt`)
-- An embeddings prompt ({class}`EmbedsPrompt`)
+- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
+- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
+- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])
 
 Note that "singleton" is as opposed to a data structure
 which encapsulates multiple prompts, i.e. of the sort
 which may be utilized for encoder/decoder models when
 the user desires to express both the encoder & decoder
-prompts explicitly, i.e. {class}`ExplicitEncoderDecoderPrompt`
+prompts explicitly, i.e. 
+[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
 
-A prompt of type {class}`SingletonPrompt` may be employed
-as (1) input to a decoder-only model, (2) input to
+A prompt of type [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] may be 
+employed as (1) input to a decoder-only model, (2) input to
 the encoder of an encoder/decoder model, in the scenario
 where the decoder-prompt is not specified explicitly, or
 (3) as a member of a larger data structure encapsulating
-more than one prompt, i.e. {class}`ExplicitEncoderDecoderPrompt`
+more than one prompt, i.e. 
+[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
 """
 
 
@@ -126,18 +128,20 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
     comprising an explicit encoder prompt and a decoder prompt.
 
     The encoder and decoder prompts, respectively, may be formatted
-    according to any of the {class}`SingletonPrompt` schemas,
+    according to any of the
+    [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] schemas,
     and are not required to have the same schema.
 
     Only the encoder prompt may have multi-modal data. mm_processor_kwargs
     should be at the top-level, and should not be set in the encoder/decoder
     prompts, since they are agnostic to the encoder/decoder.
 
-    Note that an {class}`ExplicitEncoderDecoderPrompt` may not
-    be used as an input to a decoder-only model,
+    Note that an
+    [`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
+    may not be used as an input to a decoder-only model,
     and that the `encoder_prompt` and `decoder_prompt`
     fields of this data structure themselves must be
-    {class}`SingletonPrompt` instances.
+    [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] instances.
     """
 
     encoder_prompt: _T1_co
@@ -152,11 +156,11 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
 Set of possible schemas for an LLM input, including
 both decoder-only and encoder/decoder input types:
 
-- A text prompt ({class}`str` or {class}`TextPrompt`)
-- A tokenized prompt ({class}`TokensPrompt`)
-- An embeddings prompt ({class}`EmbedsPrompt`)
+- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
+- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
+- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])
 - A single data structure containing both an encoder and a decoder prompt
-  ({class}`ExplicitEncoderDecoderPrompt`)
+  ([`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt])
 """
 
 
@@ -189,7 +193,8 @@ def token_inputs(
     prompt: Optional[str] = None,
     cache_salt: Optional[str] = None,
 ) -> TokenInputs:
-    """Construct {class}`TokenInputs` from optional values."""
+    """Construct [`TokenInputs`][vllm.inputs.data.TokenInputs] from optional
+    values."""
     inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)
 
     if prompt is not None:
@@ -221,7 +226,8 @@ def embeds_inputs(
     prompt_embeds: torch.Tensor,
     cache_salt: Optional[str] = None,
 ) -> EmbedsInputs:
-    """Construct :class:`EmbedsInputs` from optional values."""
+    """Construct [`EmbedsInputs`][vllm.inputs.data.EmbedsInputs] from optional
+    values."""
     inputs = EmbedsInputs(type="embeds", prompt_embeds=prompt_embeds)
 
     if cache_salt is not None:
@@ -232,19 +238,20 @@ def embeds_inputs(
 
 DecoderOnlyInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
 """
-The inputs in {class}`~vllm.LLMEngine` before they are
+The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they are
 passed to the model executor.
 This specifies the data required for decoder-only models.
 """
 
 
 class EncoderDecoderInputs(TypedDict):
     """
-    The inputs in {class}`~vllm.LLMEngine` before they are
-    passed to the model executor.
+    The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they
+    are passed to the model executor.
 
     This specifies the required data for encoder-decoder models.
     """
+
     encoder: Union[TokenInputs, "MultiModalInputs"]
     """The inputs for the encoder portion."""
 
@@ -254,13 +261,13 @@ class EncoderDecoderInputs(TypedDict):
 
 SingletonInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
 """
-A processed {class}`SingletonPrompt` which can be passed to
-{class}`vllm.sequence.Sequence`.
+A processed [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] which can be 
+passed to [`vllm.sequence.Sequence`][].
 """
 
 ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs]
 """
-The inputs to {data}`vllm.inputs.InputProcessor`.
+The outputs from [`vllm.inputs.preprocess.InputPreprocessor`][].
 """
 
 _T1 = TypeVar("_T1", bound=SingletonPrompt, default=SingletonPrompt)
@@ -277,7 +284,8 @@ def build_explicit_enc_dec_prompt(
     return ExplicitEncoderDecoderPrompt(
         encoder_prompt=encoder_prompt,
         decoder_prompt=decoder_prompt,
-        mm_processor_kwargs=mm_processor_kwargs)
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
 
 
 def zip_enc_dec_prompts(
@@ -288,7 +296,8 @@ def zip_enc_dec_prompts(
 ) -> list[ExplicitEncoderDecoderPrompt[_T1, _T2]]:
     """
     Zip encoder and decoder prompts together into a list of
-    {class}`ExplicitEncoderDecoderPrompt` instances.
+    [`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
+    instances.
 
     ``mm_processor_kwargs`` may also be provided; if a dict is passed, the same
     dictionary will be used for every encoder/decoder prompt. If an iterable is
@@ -299,10 +308,11 @@ def zip_enc_dec_prompts(
     if isinstance(mm_processor_kwargs, dict):
         return [
             build_explicit_enc_dec_prompt(
-                encoder_prompt, decoder_prompt,
-                cast(dict[str, Any], mm_processor_kwargs))
-            for (encoder_prompt,
-                 decoder_prompt) in zip(enc_prompts, dec_prompts)
+                encoder_prompt,
+                decoder_prompt,
+                cast(dict[str, Any], mm_processor_kwargs),
+            ) for (encoder_prompt,
+                   decoder_prompt) in zip(enc_prompts, dec_prompts)
         ]
     return [
         build_explicit_enc_dec_prompt(encoder_prompt, decoder_prompt,

diff --git a/vllm/inputs/parse.py b/vllm/inputs/parse.py
@@ -23,13 +23,13 @@ class ParsedTokens(TypedDict):
 
 @overload
 def parse_and_batch_prompt(
-        prompt: Union[str, list[str]]) -> Sequence[ParsedText]:
+    prompt: Union[str, list[str]], ) -> Sequence[ParsedText]:
     ...
 
 
 @overload
 def parse_and_batch_prompt(
-        prompt: Union[list[int], list[list[int]]]) -> Sequence[ParsedTokens]:
+    prompt: Union[list[int], list[list[int]]], ) -> Sequence[ParsedTokens]:
     ...
 
 
@@ -86,7 +86,7 @@ class ParsedTokensPrompt(TypedDict):
 
 
 class ParsedEmbedsPrompt(TypedDict):
-    type: Literal['embeds']
+    type: Literal["embeds"]
     content: EmbedsPrompt
 
 
@@ -133,7 +133,7 @@ def parse_singleton_prompt(prompt: SingletonPrompt) -> ParsedSingletonPrompt:
 
 
 def is_explicit_encoder_decoder_prompt(
-        prompt: PromptType) -> TypeIs[ExplicitEncoderDecoderPrompt]:
+    prompt: PromptType, ) -> TypeIs[ExplicitEncoderDecoderPrompt]:
     return isinstance(prompt, dict) and "encoder_prompt" in prompt
 
 

diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py
@@ -67,26 +67,26 @@ def get_eos_token_id(self,
         return self.tokenizer.get_lora_tokenizer(lora_request).eos_token_id
 
     def get_decoder_start_token_id(self) -> Optional[int]:
-        '''
+        """
         Obtain the decoder start token id employed by an encoder/decoder
         model. Returns None for non-encoder/decoder models or if the
         model config is unavailable.
-        '''
+        """
 
         if not self.model_config.is_encoder_decoder:
             logger.warning_once(
                 "Using None for decoder start token id because "
                 "this is not an encoder/decoder model.")
             return None
 
-        if (self.model_config is None or self.model_config.hf_config is None):
+        if self.model_config is None or self.model_config.hf_config is None:
             logger.warning_once(
                 "Using None for decoder start token id because "
                 "model config is not available.")
             return None
 
         dec_start_token_id = getattr(self.model_config.hf_config,
-                                     'decoder_start_token_id', None)
+                                     "decoder_start_token_id", None)
         if dec_start_token_id is None:
             logger.warning_once(
                 "Falling back on <BOS> for decoder start token "
@@ -97,7 +97,7 @@ def get_decoder_start_token_id(self) -> Optional[int]:
         return dec_start_token_id
 
     def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
-        '''
+        """
         Specifically for encoder/decoder models:
         generate a default decoder prompt for when
         the user specifies only the encoder prompt.
@@ -126,7 +126,7 @@ def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
         Returns:
 
         * prompt_token_ids
-        '''
+        """
 
         bos_token_id = self.get_bos_token_id()
         assert bos_token_id is not None
@@ -224,7 +224,10 @@ async def _tokenize_prompt_async(
         lora_request: Optional[LoRARequest],
         tokenization_kwargs: Optional[dict[str, Any]] = None,
     ) -> list[int]:
-        """Async version of {meth}`_tokenize_prompt`."""
+        """
+        Async version of
+        [`_tokenize_prompt`][vllm.inputs.preprocess.InputPreprocessor._tokenize_prompt].
+        """
         tokenizer = self.get_tokenizer_group()
         tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs)
 
@@ -287,7 +290,10 @@ async def _process_multimodal_async(
         lora_request: Optional[LoRARequest],
         return_mm_hashes: bool = False,
     ) -> MultiModalInputs:
-        """Async version of {meth}`_process_multimodal`."""
+        """
+        Async version of
+        [`_process_multimodal`][vllm.inputs.preprocess.InputPreprocessor._process_multimodal].
+        """
         tokenizer = await self._get_mm_tokenizer_async(lora_request)
 
         mm_processor = self.mm_registry.create_processor(self.model_config,
@@ -472,7 +478,7 @@ def _prompt_to_llm_inputs(
 
         Returns:
 
-        * {class}`SingletonInputs` instance
+        * [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
         """
         parsed = parse_singleton_prompt(prompt)
 
@@ -508,7 +514,10 @@ async def _prompt_to_llm_inputs_async(
         lora_request: Optional[LoRARequest] = None,
         return_mm_hashes: bool = False,
     ) -> SingletonInputs:
-        """Async version of {meth}`_prompt_to_llm_inputs`."""
+        """
+        Async version of
+        [`_prompt_to_llm_inputs`][vllm.inputs.preprocess.InputPreprocessor._prompt_to_llm_inputs].
+        """
         parsed = parse_singleton_prompt(prompt)
 
         if parsed["type"] == "embeds":
@@ -644,7 +653,9 @@ def _process_encoder_decoder_prompt(
     ) -> EncoderDecoderInputs:
         """
         For encoder/decoder models only:
-        Process an input prompt into an {class}`EncoderDecoderInputs` instance.
+        Process an input prompt into an
+        [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
+        instance.
 
         There are two types of input prompts:
         singleton prompts which carry only the
@@ -670,7 +681,8 @@ def _process_encoder_decoder_prompt(
 
         Returns:
 
-        * {class}`EncoderDecoderInputs` instance
+        * [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
+          instance
         """
         encoder_inputs: SingletonInputs
         decoder_inputs: Optional[SingletonInputs]
@@ -710,7 +722,10 @@ async def _process_encoder_decoder_prompt_async(
         prompt: PromptType,
         tokenization_kwargs: Optional[dict[str, Any]] = None,
     ) -> EncoderDecoderInputs:
-        """Async version of {meth}`_process_encoder_decoder_prompt`."""
+        """
+        Async version of
+        [`_process_encoder_decoder_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_encoder_decoder_prompt].
+        """
         encoder_inputs: SingletonInputs
         decoder_inputs: Optional[SingletonInputs]
 
@@ -778,7 +793,8 @@ def _process_decoder_only_prompt(
     ) -> DecoderOnlyInputs:
         """
         For decoder-only models:
-        Process an input prompt into an {class}`DecoderOnlyInputs` instance.
+        Process an input prompt into a
+        [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance.
 
         Arguments:
 
@@ -789,7 +805,7 @@ def _process_decoder_only_prompt(
 
         Returns:
 
-        * {class}`DecoderOnlyInputs` instance
+        * [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance
         """
 
         prompt_comps = self._prompt_to_llm_inputs(
@@ -812,7 +828,10 @@ async def _process_decoder_only_prompt_async(
         prompt_adapter_request: Optional[PromptAdapterRequest] = None,
         return_mm_hashes: bool = False,
     ) -> DecoderOnlyInputs:
-        """Async version of {meth}`_process_decoder_only_prompt`."""
+        """
+        Async version of
+        [`_process_decoder_only_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_decoder_only_prompt].
+        """
         prompt_comps = await self._prompt_to_llm_inputs_async(
             prompt,
             tokenization_kwargs=tokenization_kwargs,
@@ -863,7 +882,10 @@ async def preprocess_async(
         prompt_adapter_request: Optional[PromptAdapterRequest] = None,
         return_mm_hashes: bool = False,
     ) -> ProcessorInputs:
-        """Async version of {meth}`preprocess`."""
+        """
+        Async version of
+        [`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess].
+        """
         if self.model_config.is_encoder_decoder:
             assert not return_mm_hashes, (
                 "Multimodal hashes for encoder-decoder models should not be ",