2929
3030HfImageItem : TypeAlias = Union ["Image" , np .ndarray , "torch.Tensor" ]
3131"""
32- A {class} `transformers.image_utils.ImageInput` representing a single image
32+ A `transformers.image_utils.ImageInput` representing a single image
3333item, which can be passed to a HuggingFace `ImageProcessor`.
3434"""
3535
3636HfVideoItem : TypeAlias = Union [list ["Image" ], np .ndarray , "torch.Tensor" ,
3737 list [np .ndarray ], list ["torch.Tensor" ]]
3838"""
39- A {class} `transformers.image_utils.VideoInput` representing a single video
39+ A `transformers.image_utils.VideoInput` representing a single video
4040item, which can be passed to a HuggingFace `VideoProcessor`.
4141"""
4242
4848
4949ImageItem : TypeAlias = Union [HfImageItem , "torch.Tensor" ]
5050"""
51- A {class} `transformers.image_utils.ImageInput` representing a single image
51+ A `transformers.image_utils.ImageInput` representing a single image
5252item, which can be passed to a HuggingFace `ImageProcessor`.
5353
5454Alternatively, a 3-D tensor or batch of 2-D tensors,
5858
5959VideoItem : TypeAlias = Union [HfVideoItem , "torch.Tensor" ]
6060"""
61- A {class} `transformers.image_utils.VideoInput` representing a single video
61+ A `transformers.image_utils.VideoInput` representing a single video
6262item, which can be passed to a HuggingFace `VideoProcessor`.
6363
6464Alternatively, a 3-D tensor or batch of 2-D tensors,
@@ -108,7 +108,8 @@ class MultiModalDataBuiltins(TypedDict, total=False):
108108"""
109109A dictionary containing an entry for each modality type to input.
110110
111- The built-in modalities are defined by {class}`MultiModalDataBuiltins`.
111+ The built-in modalities are defined by
112+ [`MultiModalDataBuiltins`][vllm.multimodal.inputs.MultiModalDataBuiltins].
112113"""
113114
114115
@@ -169,7 +170,8 @@ def __eq__(self, other: object) -> bool:
169170
170171
171172def nested_tensors_equal (a : NestedTensors , b : NestedTensors ) -> bool :
172- """Equality check between {data}`NestedTensors` objects."""
173+ """Equality check between
174+ [`NestedTensors`][vllm.multimodal.inputs.NestedTensors] objects."""
173175 if isinstance (a , torch .Tensor ):
174176 return isinstance (b , torch .Tensor ) and torch .equal (a , b )
175177 elif isinstance (b , torch .Tensor ):
@@ -189,15 +191,15 @@ def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
189191BatchedTensorInputs : TypeAlias = Mapping [str , NestedTensors ]
190192"""
191193A dictionary containing nested tensors which have been batched via
192- {meth} `MultiModalKwargs.batch`.
194+ [ `MultiModalKwargs.batch`][vllm.multimodal.inputs.MultiModalKwargs.batch] .
193195"""
194196
195197
196198@dataclass (frozen = True )
197199class MultiModalFieldElem :
198200 """
199201 Represents a keyword argument corresponding to a multi-modal item
200- in {class} `MultiModalKwargs`.
202+ in [ `MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs] .
201203 """
202204
203205 modality : str
@@ -208,13 +210,15 @@ class MultiModalFieldElem:
208210
209211 key : str
210212 """
211- The key of this field in {class}`MultiModalKwargs`,
213+ The key of this field in
214+ [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
212215 i.e. the name of the keyword argument to be passed to the model.
213216 """
214217
215218 data : NestedTensors
216219 """
217- The tensor data of this field in {class}`MultiModalKwargs`,
220+ The tensor data of this field in
221+ [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
218222 i.e. the value of the keyword argument to be passed to the model.
219223 """
220224
@@ -237,7 +241,8 @@ def __eq__(self, other: object) -> bool:
237241class BaseMultiModalField (ABC ):
238242 """
239243 Defines how to interpret tensor data belonging to a keyword argument in
240- {class}`MultiModalKwargs` for multiple multi-modal items, and vice versa.
244+ [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs] for multiple
245+ multi-modal items, and vice versa.
241246 """
242247
243248 def _field_factory (self , * , modality : str , key : str ):
@@ -262,10 +267,12 @@ def build_elems(
262267 data : NestedTensors ,
263268 ) -> Sequence [MultiModalFieldElem ]:
264269 """
265- Construct {class}`MultiModalFieldElem` instances to represent
266- the provided data.
270+ Construct
271+ [`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
272+ instances to represent the provided data.
267273
268- This is the inverse of {meth}`reduce_data`.
274+ This is the inverse of
275+ [`reduce_data`][vllm.multimodal.inputs.BaseMultiModalField.reduce_data].
269276 """
270277 raise NotImplementedError
271278
@@ -275,9 +282,11 @@ def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
275282
276283 def reduce_data (self , elems : list [MultiModalFieldElem ]) -> NestedTensors :
277284 """
278- Merge the data from multiple instances of {class}`MultiModalFieldElem`.
285+ Merge the data from multiple instances of
286+ [`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem].
279287
280- This is the inverse of {meth}`build_elems`.
288+ This is the inverse of
289+ [`build_elems`][vllm.multimodal.inputs.BaseMultiModalField.build_elems].
281290 """
282291 field_types = [type (item .field ) for item in elems ]
283292 if len (set (field_types )) > 1 :
@@ -290,7 +299,7 @@ def reduce_data(self, elems: list[MultiModalFieldElem]) -> NestedTensors:
290299class MultiModalBatchedField (BaseMultiModalField ):
291300 """
292301 Info:
293- [MultiModalFieldConfig.batched][ ]
302+ [` MultiModalFieldConfig.batched`][vllm.multimodal.inputs.MultiModalFieldConfig.batched ]
294303 """
295304
296305 def build_elems (
@@ -320,8 +329,8 @@ def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
320329class MultiModalFlatField (BaseMultiModalField ):
321330 """
322331 Info:
323- [MultiModalFieldConfig.flat][ ]
324- [MultiModalFieldConfig.flat_from_sizes][ ]
332+ [` MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat ]
333+ [` MultiModalFieldConfig.flat_from_sizes`][vllm.multimodal.inputs.MultiModalFieldConfig.flat_from_sizes ]
325334 """
326335 slices : Union [Sequence [slice ], Sequence [Sequence [slice ]]]
327336 dim : int = 0
@@ -362,7 +371,7 @@ def _expect_same_shape(tensor: torch.Tensor):
362371class MultiModalSharedField (BaseMultiModalField ):
363372 """
364373 Info:
365- [MultiModalFieldConfig.shared][ ]
374+ [` MultiModalFieldConfig.shared`][vllm.multimodal.inputs.MultiModalFieldConfig.shared ]
366375 """
367376 batch_size : int
368377
@@ -508,7 +517,7 @@ def flat_from_sizes(modality: str,
508517 ```
509518
510519 Info:
511- [MultiModalFieldConfig.flat][ ]
520+ [` MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat ]
512521 """
513522
514523 if size_per_item .ndim != 1 :
@@ -572,8 +581,10 @@ def build_elems(
572581
573582class MultiModalKwargsItem (UserDict [str , MultiModalFieldElem ]):
574583 """
575- A collection of {class}`MultiModalFieldElem`
576- corresponding to a data item in {class}`MultiModalDataItems`.
584+ A collection of
585+ [`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
586+ corresponding to a data item in
587+ [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
577588 """
578589
579590 @staticmethod
@@ -592,11 +603,13 @@ def modality(self) -> str:
592603class MultiModalKwargs (UserDict [str , NestedTensors ]):
593604 """
594605 A dictionary that represents the keyword arguments to
595- {meth}`~ torch.nn.Module.forward`.
606+ [` torch.nn.Module.forward`][] .
596607
597608 The metadata `items` enables us to obtain the keyword arguments
598- corresponding to each data item in {class}`MultiModalDataItems`, via
599- {meth}`get_item` and {meth}`get_items`.
609+ corresponding to each data item in
610+ [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems], via
611+ [`get_item`][vllm.multimodal.inputs.MultiModalKwargs.get_item] and
612+ [`get_items`][vllm.multimodal.inputs.MultiModalKwargs.get_items].
600613 """
601614
602615 @staticmethod
@@ -635,7 +648,9 @@ def from_hf_inputs(
635648
636649 @staticmethod
637650 def from_items (items : Sequence [MultiModalKwargsItem ]):
638- """Construct a new {class}`MultiModalKwargs` from multiple items."""
651+ """Construct a new
652+ [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs]
653+ from multiple items."""
639654 elems_by_key = defaultdict [str , list [MultiModalFieldElem ]](list )
640655 for item in items :
641656 for key , elem in item .items ():
@@ -800,7 +815,7 @@ def get_items(self, modality: str) -> Sequence[MultiModalKwargsItem]:
800815class MultiModalInputs (TypedDict ):
801816 """
802817 Represents the outputs of
803- {class}` vllm.multimodal.processing.BaseMultiModalProcessor` ,
818+ [`BaseMultiModalProcessor`][ vllm.multimodal.processing.BaseMultiModalProcessor] ,
804819 ready to be passed to vLLM internals.
805820 """
806821
@@ -836,7 +851,8 @@ class MultiModalInputs(TypedDict):
836851
837852class MultiModalEncDecInputs (MultiModalInputs ):
838853 """
839- Represents the outputs of {class}`vllm.multimodal.EncDecMultiModalProcessor`
854+ Represents the outputs of
855+ [`EncDecMultiModalProcessor`][vllm.multimodal.processing.EncDecMultiModalProcessor]
840856 ready to be passed to vLLM internals.
841857 """
842858
0 commit comments