Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions vllm/inputs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

INPUT_REGISTRY = InputRegistry()
"""
The global {class}`~InputRegistry` which is used by {class}`~vllm.LLMEngine`
to dispatch data processing according to the target model.
The global [`InputRegistry`][vllm.inputs.registry.InputRegistry] which is used
by [`LLMEngine`][vllm.LLMEngine] to dispatch data processing according to the
target model.
"""

__all__ = [
Expand Down
68 changes: 39 additions & 29 deletions vllm/inputs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,24 @@ class EmbedsPrompt(TypedDict):
"""
Set of possible schemas for a single prompt:

- A text prompt ({class}`str` or {class}`TextPrompt`)
- A tokenized prompt ({class}`TokensPrompt`)
- An embeddings prompt ({class}`EmbedsPrompt`)
- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])

Note that "singleton" is as opposed to a data structure
which encapsulates multiple prompts, i.e. of the sort
which may be utilized for encoder/decoder models when
the user desires to express both the encoder & decoder
prompts explicitly, i.e. {class}`ExplicitEncoderDecoderPrompt`
prompts explicitly, i.e.
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]

A prompt of type {class}`SingletonPrompt` may be employed
as (1) input to a decoder-only model, (2) input to
A prompt of type [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] may be
employed as (1) input to a decoder-only model, (2) input to
the encoder of an encoder/decoder model, in the scenario
where the decoder-prompt is not specified explicitly, or
(3) as a member of a larger data structure encapsulating
more than one prompt, i.e. {class}`ExplicitEncoderDecoderPrompt`
more than one prompt, i.e.
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
"""


Expand Down Expand Up @@ -126,18 +128,20 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
comprising an explicit encoder prompt and a decoder prompt.

The encoder and decoder prompts, respectively, may be formatted
according to any of the {class}`SingletonPrompt` schemas,
according to any of the
[`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] schemas,
and are not required to have the same schema.

Only the encoder prompt may have multi-modal data. mm_processor_kwargs
should be at the top-level, and should not be set in the encoder/decoder
prompts, since they are agnostic to the encoder/decoder.

Note that an {class}`ExplicitEncoderDecoderPrompt` may not
be used as an input to a decoder-only model,
Note that an
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
may not be used as an input to a decoder-only model,
and that the `encoder_prompt` and `decoder_prompt`
fields of this data structure themselves must be
{class}`SingletonPrompt` instances.
[`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] instances.
"""

encoder_prompt: _T1_co
Expand All @@ -152,11 +156,11 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
Set of possible schemas for an LLM input, including
both decoder-only and encoder/decoder input types:

- A text prompt ({class}`str` or {class}`TextPrompt`)
- A tokenized prompt ({class}`TokensPrompt`)
- An embeddings prompt ({class}`EmbedsPrompt`)
- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])
- A single data structure containing both an encoder and a decoder prompt
({class}`ExplicitEncoderDecoderPrompt`)
([`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt])
"""


Expand Down Expand Up @@ -189,7 +193,8 @@ def token_inputs(
prompt: Optional[str] = None,
cache_salt: Optional[str] = None,
) -> TokenInputs:
"""Construct {class}`TokenInputs` from optional values."""
"""Construct [`TokenInputs`][vllm.inputs.data.TokenInputs] from optional
values."""
inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)

if prompt is not None:
Expand Down Expand Up @@ -221,7 +226,8 @@ def embeds_inputs(
prompt_embeds: torch.Tensor,
cache_salt: Optional[str] = None,
) -> EmbedsInputs:
"""Construct :class:`EmbedsInputs` from optional values."""
"""Construct [`EmbedsInputs`][vllm.inputs.data.EmbedsInputs] from optional
values."""
inputs = EmbedsInputs(type="embeds", prompt_embeds=prompt_embeds)

if cache_salt is not None:
Expand All @@ -232,19 +238,20 @@ def embeds_inputs(

DecoderOnlyInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
"""
The inputs in {class}`~vllm.LLMEngine` before they are
The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they are
passed to the model executor.
This specifies the data required for decoder-only models.
"""


class EncoderDecoderInputs(TypedDict):
"""
The inputs in {class}`~vllm.LLMEngine` before they are
passed to the model executor.
The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they
are passed to the model executor.

This specifies the required data for encoder-decoder models.
"""

encoder: Union[TokenInputs, "MultiModalInputs"]
"""The inputs for the encoder portion."""

Expand All @@ -254,13 +261,13 @@ class EncoderDecoderInputs(TypedDict):

SingletonInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
"""
A processed {class}`SingletonPrompt` which can be passed to
{class}`vllm.sequence.Sequence`.
A processed [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] which can be
passed to [`vllm.sequence.Sequence`][].
"""

ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs]
"""
The inputs to {data}`vllm.inputs.InputProcessor`.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed wrong description about ProcessorInputs

The outputs from [`vllm.inputs.preprocess.InputPreprocessor`][].
"""

_T1 = TypeVar("_T1", bound=SingletonPrompt, default=SingletonPrompt)
Expand All @@ -277,7 +284,8 @@ def build_explicit_enc_dec_prompt(
return ExplicitEncoderDecoderPrompt(
encoder_prompt=encoder_prompt,
decoder_prompt=decoder_prompt,
mm_processor_kwargs=mm_processor_kwargs)
mm_processor_kwargs=mm_processor_kwargs,
)


def zip_enc_dec_prompts(
Expand All @@ -288,7 +296,8 @@ def zip_enc_dec_prompts(
) -> list[ExplicitEncoderDecoderPrompt[_T1, _T2]]:
"""
Zip encoder and decoder prompts together into a list of
{class}`ExplicitEncoderDecoderPrompt` instances.
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
instances.

``mm_processor_kwargs`` may also be provided; if a dict is passed, the same
dictionary will be used for every encoder/decoder prompt. If an iterable is
Expand All @@ -299,10 +308,11 @@ def zip_enc_dec_prompts(
if isinstance(mm_processor_kwargs, dict):
return [
build_explicit_enc_dec_prompt(
encoder_prompt, decoder_prompt,
cast(dict[str, Any], mm_processor_kwargs))
for (encoder_prompt,
decoder_prompt) in zip(enc_prompts, dec_prompts)
encoder_prompt,
decoder_prompt,
cast(dict[str, Any], mm_processor_kwargs),
) for (encoder_prompt,
decoder_prompt) in zip(enc_prompts, dec_prompts)
]
return [
build_explicit_enc_dec_prompt(encoder_prompt, decoder_prompt,
Expand Down
8 changes: 4 additions & 4 deletions vllm/inputs/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ class ParsedTokens(TypedDict):

@overload
def parse_and_batch_prompt(
prompt: Union[str, list[str]]) -> Sequence[ParsedText]:
prompt: Union[str, list[str]], ) -> Sequence[ParsedText]:
...


@overload
def parse_and_batch_prompt(
prompt: Union[list[int], list[list[int]]]) -> Sequence[ParsedTokens]:
prompt: Union[list[int], list[list[int]]], ) -> Sequence[ParsedTokens]:
...


Expand Down Expand Up @@ -86,7 +86,7 @@ class ParsedTokensPrompt(TypedDict):


class ParsedEmbedsPrompt(TypedDict):
type: Literal['embeds']
type: Literal["embeds"]
content: EmbedsPrompt


Expand Down Expand Up @@ -133,7 +133,7 @@ def parse_singleton_prompt(prompt: SingletonPrompt) -> ParsedSingletonPrompt:


def is_explicit_encoder_decoder_prompt(
prompt: PromptType) -> TypeIs[ExplicitEncoderDecoderPrompt]:
prompt: PromptType, ) -> TypeIs[ExplicitEncoderDecoderPrompt]:
return isinstance(prompt, dict) and "encoder_prompt" in prompt


Expand Down
56 changes: 39 additions & 17 deletions vllm/inputs/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,26 +67,26 @@ def get_eos_token_id(self,
return self.tokenizer.get_lora_tokenizer(lora_request).eos_token_id

def get_decoder_start_token_id(self) -> Optional[int]:
'''
"""
Obtain the decoder start token id employed by an encoder/decoder
model. Returns None for non-encoder/decoder models or if the
model config is unavailable.
'''
"""

if not self.model_config.is_encoder_decoder:
logger.warning_once(
"Using None for decoder start token id because "
"this is not an encoder/decoder model.")
return None

if (self.model_config is None or self.model_config.hf_config is None):
if self.model_config is None or self.model_config.hf_config is None:
logger.warning_once(
"Using None for decoder start token id because "
"model config is not available.")
return None

dec_start_token_id = getattr(self.model_config.hf_config,
'decoder_start_token_id', None)
"decoder_start_token_id", None)
if dec_start_token_id is None:
logger.warning_once(
"Falling back on <BOS> for decoder start token "
Expand All @@ -97,7 +97,7 @@ def get_decoder_start_token_id(self) -> Optional[int]:
return dec_start_token_id

def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
'''
"""
Specifically for encoder/decoder models:
generate a default decoder prompt for when
the user specifies only the encoder prompt.
Expand Down Expand Up @@ -126,7 +126,7 @@ def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
Returns:

* prompt_token_ids
'''
"""

bos_token_id = self.get_bos_token_id()
assert bos_token_id is not None
Expand Down Expand Up @@ -224,7 +224,10 @@ async def _tokenize_prompt_async(
lora_request: Optional[LoRARequest],
tokenization_kwargs: Optional[dict[str, Any]] = None,
) -> list[int]:
"""Async version of {meth}`_tokenize_prompt`."""
"""
Async version of
[`_tokenize_prompt`][vllm.inputs.preprocess.InputPreprocessor._tokenize_prompt].
"""
tokenizer = self.get_tokenizer_group()
tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs)

Expand Down Expand Up @@ -287,7 +290,10 @@ async def _process_multimodal_async(
lora_request: Optional[LoRARequest],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
"""Async version of {meth}`_process_multimodal`."""
"""
Async version of
[`_process_multimodal`][vllm.inputs.preprocess.InputPreprocessor._process_multimodal].
"""
tokenizer = await self._get_mm_tokenizer_async(lora_request)

mm_processor = self.mm_registry.create_processor(self.model_config,
Expand Down Expand Up @@ -472,7 +478,7 @@ def _prompt_to_llm_inputs(

Returns:

* {class}`SingletonInputs` instance
* [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
"""
parsed = parse_singleton_prompt(prompt)

Expand Down Expand Up @@ -508,7 +514,10 @@ async def _prompt_to_llm_inputs_async(
lora_request: Optional[LoRARequest] = None,
return_mm_hashes: bool = False,
) -> SingletonInputs:
"""Async version of {meth}`_prompt_to_llm_inputs`."""
"""
Async version of
[`_prompt_to_llm_inputs`][vllm.inputs.preprocess.InputPreprocessor._prompt_to_llm_inputs].
"""
parsed = parse_singleton_prompt(prompt)

if parsed["type"] == "embeds":
Expand Down Expand Up @@ -644,7 +653,9 @@ def _process_encoder_decoder_prompt(
) -> EncoderDecoderInputs:
"""
For encoder/decoder models only:
Process an input prompt into an {class}`EncoderDecoderInputs` instance.
Process an input prompt into an
[`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
instance.

There are two types of input prompts:
singleton prompts which carry only the
Expand All @@ -670,7 +681,8 @@ def _process_encoder_decoder_prompt(

Returns:

* {class}`EncoderDecoderInputs` instance
* [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
instance
"""
encoder_inputs: SingletonInputs
decoder_inputs: Optional[SingletonInputs]
Expand Down Expand Up @@ -710,7 +722,10 @@ async def _process_encoder_decoder_prompt_async(
prompt: PromptType,
tokenization_kwargs: Optional[dict[str, Any]] = None,
) -> EncoderDecoderInputs:
"""Async version of {meth}`_process_encoder_decoder_prompt`."""
"""
Async version of
[`_process_encoder_decoder_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_encoder_decoder_prompt].
"""
encoder_inputs: SingletonInputs
decoder_inputs: Optional[SingletonInputs]

Expand Down Expand Up @@ -778,7 +793,8 @@ def _process_decoder_only_prompt(
) -> DecoderOnlyInputs:
"""
For decoder-only models:
Process an input prompt into an {class}`DecoderOnlyInputs` instance.
Process an input prompt into a
[`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance.

Arguments:

Expand All @@ -789,7 +805,7 @@ def _process_decoder_only_prompt(

Returns:

* {class}`DecoderOnlyInputs` instance
* [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance
"""

prompt_comps = self._prompt_to_llm_inputs(
Expand All @@ -812,7 +828,10 @@ async def _process_decoder_only_prompt_async(
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> DecoderOnlyInputs:
"""Async version of {meth}`_process_decoder_only_prompt`."""
"""
Async version of
[`_process_decoder_only_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_decoder_only_prompt].
"""
prompt_comps = await self._prompt_to_llm_inputs_async(
prompt,
tokenization_kwargs=tokenization_kwargs,
Expand Down Expand Up @@ -863,7 +882,10 @@ async def preprocess_async(
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> ProcessorInputs:
"""Async version of {meth}`preprocess`."""
"""
Async version of
[`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess].
"""
if self.model_config.is_encoder_decoder:
assert not return_mm_hashes, (
"Multimodal hashes for encoder-decoder models should not be ",
Expand Down
Loading
Loading