Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@
# pydantic needs the TypedDict from typing_extensions
from typing_extensions import Required, TypedDict

from vllm import envs
from vllm.config import ModelConfig
from vllm.logger import init_logger
from vllm.model_executor.models import SupportsMultiModal
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict, MultiModalUUIDDict
from vllm.multimodal.utils import MediaConnector
from vllm.multimodal.utils import MEDIA_CONNECTOR_REGISTRY, MediaConnector
from vllm.transformers_utils.chat_templates import get_chat_template_fallback_path
from vllm.transformers_utils.processor import cached_get_processor
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
Expand Down Expand Up @@ -805,11 +806,12 @@

self._tracker = tracker
multimodal_config = self._tracker.model_config.multimodal_config
media_io_kwargs = getattr(multimodal_config, "media_io_kwargs", None)

Check failure on line 809 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (F841)

vllm/entrypoints/chat_utils.py:809:9: F841 Local variable `media_io_kwargs` is assigned to but never used
self._connector = MediaConnector(
media_io_kwargs=media_io_kwargs,

self._connector: MediaConnector = MEDIA_CONNECTOR_REGISTRY.load(
envs.VLLM_MEDIA_CONNECTOR,
media_io_kwargs=self._tracker._model_config.media_io_kwargs,

Check failure on line 813 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]

Check failure on line 813 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]

Check failure on line 813 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]

Check failure on line 813 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]
allowed_local_media_path=tracker.allowed_local_media_path,
allowed_media_domains=tracker.allowed_media_domains,
)

@property
Expand Down Expand Up @@ -890,11 +892,11 @@

self._tracker = tracker
multimodal_config = self._tracker.model_config.multimodal_config
media_io_kwargs = getattr(multimodal_config, "media_io_kwargs", None)

Check failure on line 895 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (F841)

vllm/entrypoints/chat_utils.py:895:9: F841 Local variable `media_io_kwargs` is assigned to but never used
self._connector = MediaConnector(
media_io_kwargs=media_io_kwargs,
self._connector: MediaConnector = MEDIA_CONNECTOR_REGISTRY.load(
envs.VLLM_MEDIA_CONNECTOR,
media_io_kwargs=self._tracker._model_config.media_io_kwargs,

Check failure on line 898 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]

Check failure on line 898 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]

Check failure on line 898 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]

Check failure on line 898 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "media_io_kwargs" [attr-defined]
allowed_local_media_path=tracker.allowed_local_media_path,
allowed_media_domains=tracker.allowed_media_domains,
)

@property
Expand Down
9 changes: 9 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
VLLM_MEDIA_CONNECTOR: str = "http"
VLLM_MM_INPUT_CACHE_GIB: int = 4
VLLM_TARGET_DEVICE: str = "cuda"
VLLM_MAIN_CUDA_VERSION: str = "12.8"
Expand Down Expand Up @@ -704,6 +705,14 @@ def get_vllm_port() -> int | None:
"VLLM_VIDEO_LOADER_BACKEND": lambda: os.getenv(
"VLLM_VIDEO_LOADER_BACKEND", "opencv"
),
# Media connector implementation.
# - "http": Default connector that supports fetching media via HTTP.
#
# Custom implementations can be registered
# via `@MEDIA_CONNECTOR_REGISTRY.register("my_custom_media_connector")` and
# imported at runtime.
# If a non-existing backend is used, an AssertionError will be thrown.
"VLLM_MEDIA_CONNECTOR": lambda: os.getenv("VLLM_MEDIA_CONNECTOR", "http"),
# [DEPRECATED] Cache size (in GiB per process) for multimodal input cache
# Default is 4 GiB per API process + 4 GiB per engine core process
"VLLM_MM_INPUT_CACHE_GIB": lambda: int(os.getenv("VLLM_MM_INPUT_CACHE_GIB", "4")),
Expand Down
19 changes: 18 additions & 1 deletion vllm/multimodal/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Mapping
from dataclasses import dataclass
from typing import TYPE_CHECKING, Generic, Protocol, TypeVar
from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar

import torch.nn as nn

Expand Down Expand Up @@ -358,3 +358,20 @@ def get_encdec_max_encoder_len(self, model_config: "ModelConfig") -> int:

first_modality = next(iter(max_tokens))
return max_tokens[first_modality]


class ExtensionManager:
def __init__(self) -> None:
self.name2class: dict[str, type] = {}

def register(self, name: str):
def wrap(cls_to_register):
self.name2class[name] = cls_to_register
return cls_to_register

return wrap

def load(self, cls_name: str, *args, **kwargs) -> Any:
cls = self.name2class.get(cls_name)
assert cls is not None, f"Extension class {cls_name} not found"
return cls(args, kwargs)
4 changes: 4 additions & 0 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import vllm.envs as envs
from vllm.connections import HTTPConnection, global_http_connection
from vllm.logger import init_logger
from vllm.multimodal.registry import ExtensionManager
from vllm.utils.jsontree import json_map_leaves

from .audio import AudioMediaIO
Expand Down Expand Up @@ -46,7 +47,10 @@

_M = TypeVar("_M")

MEDIA_CONNECTOR_REGISTRY = ExtensionManager()


@MEDIA_CONNECTOR_REGISTRY.register("http")
class MediaConnector:
def __init__(
self,
Expand Down
24 changes: 5 additions & 19 deletions vllm/multimodal/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from vllm import envs
from vllm.logger import init_logger
from vllm.multimodal.registry import ExtensionManager

from .base import MediaIO
from .image import ImageMediaIO
Expand Down Expand Up @@ -55,6 +56,9 @@ def sample_frames_from_video(frames: npt.NDArray, num_frames: int) -> npt.NDArra


class VideoLoader:
def __init__(self, *args, **kwargs) -> None:
pass

@classmethod
@abstractmethod
def load_bytes(
Expand All @@ -63,25 +67,7 @@ def load_bytes(
raise NotImplementedError


class VideoLoaderRegistry:
def __init__(self) -> None:
self.name2class: dict[str, type] = {}

def register(self, name: str):
def wrap(cls_to_register):
self.name2class[name] = cls_to_register
return cls_to_register

return wrap

@staticmethod
def load(cls_name: str) -> VideoLoader:
cls = VIDEO_LOADER_REGISTRY.name2class.get(cls_name)
assert cls is not None, f"VideoLoader class {cls_name} not found"
return cls()


VIDEO_LOADER_REGISTRY = VideoLoaderRegistry()
VIDEO_LOADER_REGISTRY = ExtensionManager()


@VIDEO_LOADER_REGISTRY.register("opencv")
Expand Down
Loading