Skip to content

Commit 12fc653

Browse files
committed
docs: class migration from sphinx to mkdocs (multimodal)
Signed-off-by: Zerohertz <[email protected]>
1 parent fe8ee76 commit 12fc653

File tree

7 files changed

+119
-80
lines changed

7 files changed

+119
-80
lines changed

vllm/multimodal/__init__.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@
88

99
MULTIMODAL_REGISTRY = MultiModalRegistry()
1010
"""
11-
The global {class}`~MultiModalRegistry` is used by model runners to
12-
dispatch data processing according to the target model.
13-
14-
Info:
15-
[mm-processing][]
11+
The global [`MultiModalRegistry`][vllm.multimodal.registry.MultiModalRegistry]
12+
is used by model runners to dispatch data processing according to the target
13+
model.
1614
"""
1715

1816
__all__ = [

vllm/multimodal/inputs.py

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@
2929

3030
HfImageItem: TypeAlias = Union["Image", np.ndarray, "torch.Tensor"]
3131
"""
32-
A {class}`transformers.image_utils.ImageInput` representing a single image
32+
A `transformers.image_utils.ImageInput` representing a single image
3333
item, which can be passed to a HuggingFace `ImageProcessor`.
3434
"""
3535

3636
HfVideoItem: TypeAlias = Union[list["Image"], np.ndarray, "torch.Tensor",
3737
list[np.ndarray], list["torch.Tensor"]]
3838
"""
39-
A {class}`transformers.image_utils.VideoInput` representing a single video
39+
A `transformers.image_utils.VideoInput` representing a single video
4040
item, which can be passed to a HuggingFace `VideoProcessor`.
4141
"""
4242

@@ -48,7 +48,7 @@
4848

4949
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor"]
5050
"""
51-
A {class}`transformers.image_utils.ImageInput` representing a single image
51+
A `transformers.image_utils.ImageInput` representing a single image
5252
item, which can be passed to a HuggingFace `ImageProcessor`.
5353
5454
Alternatively, a 3-D tensor or batch of 2-D tensors,
@@ -58,7 +58,7 @@
5858

5959
VideoItem: TypeAlias = Union[HfVideoItem, "torch.Tensor"]
6060
"""
61-
A {class}`transformers.image_utils.VideoInput` representing a single video
61+
A `transformers.image_utils.VideoInput` representing a single video
6262
item, which can be passed to a HuggingFace `VideoProcessor`.
6363
6464
Alternatively, a 3-D tensor or batch of 2-D tensors,
@@ -108,7 +108,8 @@ class MultiModalDataBuiltins(TypedDict, total=False):
108108
"""
109109
A dictionary containing an entry for each modality type to input.
110110
111-
The built-in modalities are defined by {class}`MultiModalDataBuiltins`.
111+
The built-in modalities are defined by
112+
[`MultiModalDataBuiltins`][vllm.multimodal.inputs.MultiModalDataBuiltins].
112113
"""
113114

114115

@@ -169,7 +170,8 @@ def __eq__(self, other: object) -> bool:
169170

170171

171172
def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
172-
"""Equality check between {data}`NestedTensors` objects."""
173+
"""Equality check between
174+
[`NestedTensors`][vllm.multimodal.inputs.NestedTensors] objects."""
173175
if isinstance(a, torch.Tensor):
174176
return isinstance(b, torch.Tensor) and torch.equal(a, b)
175177
elif isinstance(b, torch.Tensor):
@@ -189,15 +191,15 @@ def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
189191
BatchedTensorInputs: TypeAlias = Mapping[str, NestedTensors]
190192
"""
191193
A dictionary containing nested tensors which have been batched via
192-
{meth}`MultiModalKwargs.batch`.
194+
[`MultiModalKwargs.batch`][vllm.multimodal.inputs.MultiModalKwargs.batch].
193195
"""
194196

195197

196198
@dataclass(frozen=True)
197199
class MultiModalFieldElem:
198200
"""
199201
Represents a keyword argument corresponding to a multi-modal item
200-
in {class}`MultiModalKwargs`.
202+
in [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs].
201203
"""
202204

203205
modality: str
@@ -208,13 +210,15 @@ class MultiModalFieldElem:
208210

209211
key: str
210212
"""
211-
The key of this field in {class}`MultiModalKwargs`,
213+
The key of this field in
214+
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
212215
i.e. the name of the keyword argument to be passed to the model.
213216
"""
214217

215218
data: NestedTensors
216219
"""
217-
The tensor data of this field in {class}`MultiModalKwargs`,
220+
The tensor data of this field in
221+
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
218222
i.e. the value of the keyword argument to be passed to the model.
219223
"""
220224

@@ -237,7 +241,8 @@ def __eq__(self, other: object) -> bool:
237241
class BaseMultiModalField(ABC):
238242
"""
239243
Defines how to interpret tensor data belonging to a keyword argument in
240-
{class}`MultiModalKwargs` for multiple multi-modal items, and vice versa.
244+
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs] for multiple
245+
multi-modal items, and vice versa.
241246
"""
242247

243248
def _field_factory(self, *, modality: str, key: str):
@@ -262,10 +267,12 @@ def build_elems(
262267
data: NestedTensors,
263268
) -> Sequence[MultiModalFieldElem]:
264269
"""
265-
Construct {class}`MultiModalFieldElem` instances to represent
266-
the provided data.
270+
Construct
271+
[`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
272+
instances to represent the provided data.
267273
268-
This is the inverse of {meth}`reduce_data`.
274+
This is the inverse of
275+
[`reduce_data`][vllm.multimodal.inputs.BaseMultiModalField.reduce_data].
269276
"""
270277
raise NotImplementedError
271278

@@ -275,9 +282,11 @@ def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
275282

276283
def reduce_data(self, elems: list[MultiModalFieldElem]) -> NestedTensors:
277284
"""
278-
Merge the data from multiple instances of {class}`MultiModalFieldElem`.
285+
Merge the data from multiple instances of
286+
[`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem].
279287
280-
This is the inverse of {meth}`build_elems`.
288+
This is the inverse of
289+
[`build_elems`][vllm.multimodal.inputs.BaseMultiModalField.build_elems].
281290
"""
282291
field_types = [type(item.field) for item in elems]
283292
if len(set(field_types)) > 1:
@@ -290,7 +299,7 @@ def reduce_data(self, elems: list[MultiModalFieldElem]) -> NestedTensors:
290299
class MultiModalBatchedField(BaseMultiModalField):
291300
"""
292301
Info:
293-
[MultiModalFieldConfig.batched][]
302+
[`MultiModalFieldConfig.batched`][vllm.multimodal.inputs.MultiModalFieldConfig.batched]
294303
"""
295304

296305
def build_elems(
@@ -320,8 +329,8 @@ def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
320329
class MultiModalFlatField(BaseMultiModalField):
321330
"""
322331
Info:
323-
[MultiModalFieldConfig.flat][]
324-
[MultiModalFieldConfig.flat_from_sizes][]
332+
[`MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat]
333+
[`MultiModalFieldConfig.flat_from_sizes`][vllm.multimodal.inputs.MultiModalFieldConfig.flat_from_sizes]
325334
"""
326335
slices: Union[Sequence[slice], Sequence[Sequence[slice]]]
327336
dim: int = 0
@@ -362,7 +371,7 @@ def _expect_same_shape(tensor: torch.Tensor):
362371
class MultiModalSharedField(BaseMultiModalField):
363372
"""
364373
Info:
365-
[MultiModalFieldConfig.shared][]
374+
[`MultiModalFieldConfig.shared`][vllm.multimodal.inputs.MultiModalFieldConfig.shared]
366375
"""
367376
batch_size: int
368377

@@ -508,7 +517,7 @@ def flat_from_sizes(modality: str,
508517
```
509518
510519
Info:
511-
[MultiModalFieldConfig.flat][]
520+
[`MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat]
512521
"""
513522

514523
if size_per_item.ndim != 1:
@@ -572,8 +581,10 @@ def build_elems(
572581

573582
class MultiModalKwargsItem(UserDict[str, MultiModalFieldElem]):
574583
"""
575-
A collection of {class}`MultiModalFieldElem`
576-
corresponding to a data item in {class}`MultiModalDataItems`.
584+
A collection of
585+
[`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
586+
corresponding to a data item in
587+
[`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
577588
"""
578589

579590
@staticmethod
@@ -592,11 +603,13 @@ def modality(self) -> str:
592603
class MultiModalKwargs(UserDict[str, NestedTensors]):
593604
"""
594605
A dictionary that represents the keyword arguments to
595-
{meth}`~torch.nn.Module.forward`.
606+
[`torch.nn.Module.forward`][].
596607
597608
The metadata `items` enables us to obtain the keyword arguments
598-
corresponding to each data item in {class}`MultiModalDataItems`, via
599-
{meth}`get_item` and {meth}`get_items`.
609+
corresponding to each data item in
610+
[`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems], via
611+
[`get_item`][vllm.multimodal.inputs.MultiModalKwargs.get_item] and
612+
[`get_items`][vllm.multimodal.inputs.MultiModalKwargs.get_items].
600613
"""
601614

602615
@staticmethod
@@ -635,7 +648,9 @@ def from_hf_inputs(
635648

636649
@staticmethod
637650
def from_items(items: Sequence[MultiModalKwargsItem]):
638-
"""Construct a new {class}`MultiModalKwargs` from multiple items."""
651+
"""Construct a new
652+
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs]
653+
from multiple items."""
639654
elems_by_key = defaultdict[str, list[MultiModalFieldElem]](list)
640655
for item in items:
641656
for key, elem in item.items():
@@ -800,7 +815,7 @@ def get_items(self, modality: str) -> Sequence[MultiModalKwargsItem]:
800815
class MultiModalInputs(TypedDict):
801816
"""
802817
Represents the outputs of
803-
{class}`vllm.multimodal.processing.BaseMultiModalProcessor`,
818+
[`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor],
804819
ready to be passed to vLLM internals.
805820
"""
806821

@@ -836,7 +851,8 @@ class MultiModalInputs(TypedDict):
836851

837852
class MultiModalEncDecInputs(MultiModalInputs):
838853
"""
839-
Represents the outputs of {class}`vllm.multimodal.EncDecMultiModalProcessor`
854+
Represents the outputs of
855+
[`EncDecMultiModalProcessor`][vllm.multimodal.processing.EncDecMultiModalProcessor]
840856
ready to be passed to vLLM internals.
841857
"""
842858

vllm/multimodal/parse.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828

2929
class ModalityDataItems(ABC, Generic[_T, _I]):
3030
"""
31-
Represents data items for a modality in {class}`MultiModalDataItems`.
31+
Represents data items for a modality in
32+
[`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
3233
"""
3334

3435
def __init__(self, data: _T, modality: str) -> None:
@@ -251,15 +252,15 @@ def __init__(self, data: Union[torch.Tensor, list[torch.Tensor]]) -> None:
251252

252253
class MultiModalDataItems(UserDict[str, ModalityDataItems[Any, Any]]):
253254
"""
254-
As {data}`~vllm.multimodal.inputs.MultiModalDataDict`, but normalized
255-
such that each entry corresponds to a list.
255+
As [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict], but
256+
normalized such that each entry corresponds to a list.
256257
"""
257258

258259
def get_count(self, modality: str, *, strict: bool = True) -> int:
259260
"""
260261
Get the number of data items belonging to a modality.
261262
262-
If `strict=False`, return `0` instead of raising {exc}`KeyError`
263+
If `strict=False`, return `0` instead of raising [`KeyError`][]
263264
even if the modality is not found.
264265
"""
265266
if modality not in self:
@@ -305,8 +306,8 @@ def get_items(
305306

306307
class MultiModalDataParser:
307308
"""
308-
Parses {data}`~vllm.multimodal.inputs.MultiModalDataDict` into
309-
{class}`MultiModalDataItems`.
309+
Parses [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict]
310+
into [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
310311
311312
Args:
312313
target_sr (float, optional): Enables automatic resampling of audio

0 commit comments

Comments
 (0)