diff --git a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
index 57db1f98baed..ed5129e1c820 100644
--- a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
+++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
@@ -27,7 +27,7 @@
 from vllm.model_executor.model_loader.tensorizer_loader import (
     BLACKLISTED_TENSORIZER_ARGS,
 )
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 from .conftest import DummyExecutor, assert_from_collective_rpc
 
diff --git a/tests/utils_/test_import_utils.py b/tests/utils_/test_import_utils.py
new file mode 100644
index 000000000000..d42685b3fc9a
--- /dev/null
+++ b/tests/utils_/test_import_utils.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+
+from vllm.utils.import_utils import PlaceholderModule
+
+
+def _raises_module_not_found():
+    return pytest.raises(ModuleNotFoundError, match="No module named")
+
+
+def test_placeholder_module_error_handling():
+    placeholder = PlaceholderModule("placeholder_1234")
+
+    with _raises_module_not_found():
+        int(placeholder)
+
+    with _raises_module_not_found():
+        placeholder()
+
+    with _raises_module_not_found():
+        _ = placeholder.some_attr
+
+    with _raises_module_not_found():
+        # Test conflict with internal __name attribute
+        _ = placeholder.name
+
+    # OK to print the placeholder or use it in a f-string
+    _ = repr(placeholder)
+    _ = str(placeholder)
+
+    # No error yet; only error when it is used downstream
+    placeholder_attr = placeholder.placeholder_attr("attr")
+
+    with _raises_module_not_found():
+        int(placeholder_attr)
+
+    with _raises_module_not_found():
+        placeholder_attr()
+
+    with _raises_module_not_found():
+        _ = placeholder_attr.some_attr
+
+    with _raises_module_not_found():
+        # Test conflict with internal __module attribute
+        _ = placeholder_attr.module
diff --git a/tests/utils_/test_utils.py b/tests/utils_/test_utils.py
index efc83c0a31b8..8b9411975e15 100644
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -24,7 +24,6 @@
 from vllm.utils import (
     FlexibleArgumentParser,
     MemorySnapshot,
-    PlaceholderModule,
     bind_kv_cache,
     common_broadcastable_dtype,
     current_stream,
@@ -475,46 +474,6 @@ def test_common_broadcastable_dtype(dtypes, expected_result):
     assert common_broadcastable_dtype(dtypes) == expected_result
 
 
-def test_placeholder_module_error_handling():
-    placeholder = PlaceholderModule("placeholder_1234")
-
-    def build_ctx():
-        return pytest.raises(ModuleNotFoundError, match="No module named")
-
-    with build_ctx():
-        int(placeholder)
-
-    with build_ctx():
-        placeholder()
-
-    with build_ctx():
-        _ = placeholder.some_attr
-
-    with build_ctx():
-        # Test conflict with internal __name attribute
-        _ = placeholder.name
-
-    # OK to print the placeholder or use it in a f-string
-    _ = repr(placeholder)
-    _ = str(placeholder)
-
-    # No error yet; only error when it is used downstream
-    placeholder_attr = placeholder.placeholder_attr("attr")
-
-    with build_ctx():
-        int(placeholder_attr)
-
-    with build_ctx():
-        placeholder_attr()
-
-    with build_ctx():
-        _ = placeholder_attr.some_attr
-
-    with build_ctx():
-        # Test conflict with internal __module attribute
-        _ = placeholder_attr.module
-
-
 def test_model_specification(
     parser_with_config, cli_config_file, cli_config_file_with_model
 ):
diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py
index 66a0169cbbd0..15ed7bdc835b 100644
--- a/tests/v1/attention/utils.py
+++ b/tests/v1/attention/utils.py
@@ -20,7 +20,7 @@
     VllmConfig,
 )
 from vllm.config.model import ModelDType
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.attention.backends.utils import (
     AttentionMetadataBuilder,
     CommonAttentionMetadata,
diff --git a/vllm/assets/audio.py b/vllm/assets/audio.py
index 61c2dbf55fe3..b527ffcf9b18 100644
--- a/vllm/assets/audio.py
+++ b/vllm/assets/audio.py
@@ -8,7 +8,7 @@
 
 import numpy.typing as npt
 
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 from .base import VLLM_S3_BUCKET_URL, get_vllm_public_assets
 
diff --git a/vllm/assets/video.py b/vllm/assets/video.py
index 277c8ea1bf0d..8818b5997004 100644
--- a/vllm/assets/video.py
+++ b/vllm/assets/video.py
@@ -10,7 +10,7 @@
 from huggingface_hub import hf_hub_download
 from PIL import Image
 
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 from .base import get_cache_dir
 
diff --git a/vllm/attention/backends/registry.py b/vllm/attention/backends/registry.py
index dc6de483d6ae..05d0159d0861 100644
--- a/vllm/attention/backends/registry.py
+++ b/vllm/attention/backends/registry.py
@@ -4,7 +4,7 @@
 
 import enum
 
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 
 class _Backend(enum.Enum):
diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py
index 187274133904..9890d8d80cba 100644
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -13,7 +13,8 @@
 from vllm.attention.backends.abstract import AttentionBackend
 from vllm.attention.backends.registry import _Backend, backend_name_to_enum
 from vllm.logger import init_logger
-from vllm.utils import STR_BACKEND_ENV_VAR, resolve_obj_by_qualname
+from vllm.utils import STR_BACKEND_ENV_VAR
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 logger = init_logger(__name__)
 
diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py
index 20a15bbc31e3..652e4b122373 100644
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -39,7 +39,7 @@
 from vllm.multimodal import MultiModalDataDict
 from vllm.multimodal.image import convert_image_mode
 from vllm.transformers_utils.tokenizer import AnyTokenizer
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 try:
     from datasets import load_dataset
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index 91be7e85af51..9e6053bc3028 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -24,7 +24,8 @@
 from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
+from vllm.utils import is_torch_equal_or_newer
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 from .caching import VllmSerializableFunction
 from .compiler_interface import (
diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
index 20d4681e2c78..811cbef4afab 100644
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -21,7 +21,8 @@
 from vllm.config import CompilationMode, VllmConfig, set_current_vllm_config
 from vllm.logger import init_logger
 from vllm.sequence import IntermediateTensors
-from vllm.utils import resolve_obj_by_qualname, supports_dynamo
+from vllm.utils import supports_dynamo
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 from .monitor import start_monitoring_torch_compile
 
diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index a34fb0bf920c..ff43e4e826df 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -16,7 +16,8 @@
 from vllm.config.utils import config
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
+from vllm.utils import is_torch_equal_or_newer
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
diff --git a/vllm/config/model.py b/vllm/config/model.py
index ebad9bfb9c90..6c723624fbc2 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -41,7 +41,8 @@
 )
 from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
 from vllm.transformers_utils.utils import maybe_model_redirect
-from vllm.utils import LayerBlockType, LazyLoader, common_broadcastable_dtype
+from vllm.utils import LayerBlockType, common_broadcastable_dtype
+from vllm.utils.import_utils import LazyLoader
 
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py
index aa254a9b35f6..a5bc4d1fa3c0 100644
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@@ -13,7 +13,7 @@
 from vllm.config.parallel import ParallelConfig
 from vllm.config.utils import config
 from vllm.logger import init_logger
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index 67a8c6f7c053..132fb9049163 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -52,9 +52,9 @@
 from vllm.utils import (
     direct_register_custom_op,
     get_distributed_init_method,
-    resolve_obj_by_qualname,
     supports_custom_op,
 )
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 
 @dataclass
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 5b8a118280da..e73752b9d50b 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -81,7 +81,8 @@
     SamplingParams,
     StructuredOutputsParams,
 )
-from vllm.utils import random_uuid, resolve_obj_by_qualname
+from vllm.utils import random_uuid
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 EMBED_DTYPE_TO_TORCH_DTYPE = {
     "float32": torch.float32,
diff --git a/vllm/entrypoints/openai/speech_to_text.py b/vllm/entrypoints/openai/speech_to_text.py
index e012f43260c2..46139642c50c 100644
--- a/vllm/entrypoints/openai/speech_to_text.py
+++ b/vllm/entrypoints/openai/speech_to_text.py
@@ -32,7 +32,7 @@
 from vllm.logger import init_logger
 from vllm.model_executor.models import SupportsTranscription
 from vllm.outputs import RequestOutput
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 try:
     import librosa
diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
index c7363e442cdd..34ec9f8110e6 100644
--- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -12,8 +12,8 @@
 )
 from vllm.logger import init_logger
 from vllm.transformers_utils.tokenizer import AnyTokenizer
-from vllm.utils import import_from_path
 from vllm.utils.collections import is_list_of
+from vllm.utils.import_utils import import_from_path
 
 logger = init_logger(__name__)
 
diff --git a/vllm/lora/punica_wrapper/punica_selector.py b/vllm/lora/punica_wrapper/punica_selector.py
index c017721803fe..d8763e913e3a 100644
--- a/vllm/lora/punica_wrapper/punica_selector.py
+++ b/vllm/lora/punica_wrapper/punica_selector.py
@@ -3,7 +3,7 @@
 
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 from .punica_base import PunicaWrapperBase
 
diff --git a/vllm/model_executor/layers/pooler.py b/vllm/model_executor/layers/pooler.py
index 84e176f0ea89..a8c66315684e 100644
--- a/vllm/model_executor/layers/pooler.py
+++ b/vllm/model_executor/layers/pooler.py
@@ -17,7 +17,7 @@
 from vllm.model_executor.models.adapters import _load_st_projector
 from vllm.pooling_params import PoolingParams
 from vllm.tasks import PoolingTask
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.outputs import PoolerOutput
 from vllm.v1.pool.metadata import PoolingCursor, PoolingMetadata
 
diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py
index 4ebfba65ac80..2890a2c6d702 100644
--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@@ -26,7 +26,8 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
 from vllm.platforms import current_platform
-from vllm.utils import FlexibleArgumentParser, PlaceholderModule
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.import_utils import PlaceholderModule
 
 if TYPE_CHECKING:
     from vllm.engine.arg_utils import EngineArgs
diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py
index c2d68029f4c7..a16ce3db3003 100644
--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@@ -34,7 +34,7 @@
     get_quantization_config,
 )
 from vllm.platforms import current_platform
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 try:
     from runai_model_streamer import SafetensorsStreamer
diff --git a/vllm/multimodal/audio.py b/vllm/multimodal/audio.py
index a483837d4fb6..53052ddc6343 100644
--- a/vllm/multimodal/audio.py
+++ b/vllm/multimodal/audio.py
@@ -8,7 +8,7 @@
 import numpy as np
 import numpy.typing as npt
 
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 from .base import MediaIO
 
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
index 9eab33b45e8a..e3a0fa562e79 100644
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -22,8 +22,8 @@
 import numpy as np
 from typing_extensions import NotRequired, TypeVar, deprecated
 
-from vllm.utils import LazyLoader
 from vllm.utils.collections import full_groupby, is_list_of
+from vllm.utils.import_utils import LazyLoader
 from vllm.utils.jsontree import json_map_leaves
 
 if TYPE_CHECKING:
diff --git a/vllm/multimodal/parse.py b/vllm/multimodal/parse.py
index 71e577f0c0ad..8cb402e951da 100644
--- a/vllm/multimodal/parse.py
+++ b/vllm/multimodal/parse.py
@@ -19,8 +19,8 @@
 import torch
 from typing_extensions import assert_never
 
-from vllm.utils import LazyLoader
 from vllm.utils.collections import is_list_of
+from vllm.utils.import_utils import LazyLoader
 
 from .audio import AudioResampler
 from .inputs import (
diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py
index b9140b4fe676..8942a3206e6d 100644
--- a/vllm/platforms/__init__.py
+++ b/vllm/platforms/__init__.py
@@ -7,7 +7,8 @@
 
 from vllm import envs
 from vllm.plugins import PLATFORM_PLUGINS_GROUP, load_plugins_by_group
-from vllm.utils import resolve_obj_by_qualname, supports_xccl
+from vllm.utils import supports_xccl
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 from .interface import CpuArchEnum, Platform, PlatformEnum
 
diff --git a/vllm/plugins/io_processors/__init__.py b/vllm/plugins/io_processors/__init__.py
index c7b01ae34144..b3a3b548781e 100644
--- a/vllm/plugins/io_processors/__init__.py
+++ b/vllm/plugins/io_processors/__init__.py
@@ -6,7 +6,7 @@
 from vllm.config import VllmConfig
 from vllm.plugins import IO_PROCESSOR_PLUGINS_GROUP, load_plugins_by_group
 from vllm.plugins.io_processors.interface import IOProcessor
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 
 logger = logging.getLogger(__name__)
 
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index cbebca09e7b8..3a595a3076eb 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -8,8 +8,8 @@
 from typing import TYPE_CHECKING, Any
 
 from vllm.logger import init_logger
-from vllm.utils import import_from_path
 from vllm.utils.collections import is_list_of
+from vllm.utils.import_utils import import_from_path
 
 if TYPE_CHECKING:
     from vllm.entrypoints.openai.protocol import (
diff --git a/vllm/transformers_utils/runai_utils.py b/vllm/transformers_utils/runai_utils.py
index 3f61a22adeb9..eac4294bb59c 100644
--- a/vllm/transformers_utils/runai_utils.py
+++ b/vllm/transformers_utils/runai_utils.py
@@ -9,7 +9,7 @@
 from vllm import envs
 from vllm.assets.base import get_cache_dir
 from vllm.logger import init_logger
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 logger = init_logger(__name__)
 
diff --git a/vllm/transformers_utils/s3_utils.py b/vllm/transformers_utils/s3_utils.py
index c580361f92f9..a5a3af6538b8 100644
--- a/vllm/transformers_utils/s3_utils.py
+++ b/vllm/transformers_utils/s3_utils.py
@@ -4,7 +4,7 @@
 import fnmatch
 from typing import TYPE_CHECKING, Optional
 
-from vllm.utils import PlaceholderModule
+from vllm.utils.import_utils import PlaceholderModule
 
 if TYPE_CHECKING:
     from botocore.client import BaseClient
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 94cf6d0ff153..c6fc4134d28a 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -8,8 +8,6 @@
 import getpass
 import hashlib
 import importlib
-import importlib.metadata
-import importlib.util
 import inspect
 import ipaddress
 import json
@@ -25,7 +23,6 @@
 import threading
 import time
 import traceback
-import types
 import uuid
 import warnings
 import weakref
@@ -68,7 +65,6 @@
 from packaging import version
 from packaging.version import Version
 from torch.library import Library
-from typing_extensions import Never
 
 import vllm.envs as envs
 from vllm.logger import enable_trace_function_call, init_logger
@@ -801,8 +797,6 @@ def find_nccl_include_paths() -> list[str] | None:
         paths.append(inc)
 
     try:
-        import importlib.util
-
         spec = importlib.util.find_spec("nvidia.nccl")
         if spec and getattr(spec, "submodule_search_locations", None):
             for loc in spec.submodule_search_locations:
@@ -1560,253 +1554,6 @@ def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor:
     return torch.ops._C.get_cuda_view_from_cpu_tensor(cpu_tensor)
 
 
-def import_from_path(module_name: str, file_path: str | os.PathLike):
-    """
-    Import a Python file according to its file path.
-
-    Based on the official recipe:
-    https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
-    """
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ModuleNotFoundError(f"No module named '{module_name}'")
-
-    assert spec.loader is not None
-
-    module = importlib.util.module_from_spec(spec)
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)
-    return module
-
-
-@cache
-def get_vllm_optional_dependencies():
-    metadata = importlib.metadata.metadata("vllm")
-    requirements = metadata.get_all("Requires-Dist", [])
-    extras = metadata.get_all("Provides-Extra", [])
-
-    return {
-        extra: [
-            re.split(r";|>=|<=|==", req)[0]
-            for req in requirements
-            if req.endswith(f'extra == "{extra}"')
-        ]
-        for extra in extras
-    }
-
-
-class _PlaceholderBase:
-    """
-    Disallows downstream usage of placeholder modules.
-
-    We need to explicitly override each dunder method because
-    [`__getattr__`][vllm.utils._PlaceholderBase.__getattr__]
-    is not called when they are accessed.
-
-    Info:
-        [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
-    """
-
-    def __getattr__(self, key: str) -> Never:
-        """
-        The main class should implement this to throw an error
-        for attribute accesses representing downstream usage.
-        """
-        raise NotImplementedError
-
-    # [Basic customization]
-
-    def __lt__(self, other: object):
-        return self.__getattr__("__lt__")
-
-    def __le__(self, other: object):
-        return self.__getattr__("__le__")
-
-    def __eq__(self, other: object):
-        return self.__getattr__("__eq__")
-
-    def __ne__(self, other: object):
-        return self.__getattr__("__ne__")
-
-    def __gt__(self, other: object):
-        return self.__getattr__("__gt__")
-
-    def __ge__(self, other: object):
-        return self.__getattr__("__ge__")
-
-    def __hash__(self):
-        return self.__getattr__("__hash__")
-
-    def __bool__(self):
-        return self.__getattr__("__bool__")
-
-    # [Callable objects]
-
-    def __call__(self, *args: object, **kwargs: object):
-        return self.__getattr__("__call__")
-
-    # [Container types]
-
-    def __len__(self):
-        return self.__getattr__("__len__")
-
-    def __getitem__(self, key: object):
-        return self.__getattr__("__getitem__")
-
-    def __setitem__(self, key: object, value: object):
-        return self.__getattr__("__setitem__")
-
-    def __delitem__(self, key: object):
-        return self.__getattr__("__delitem__")
-
-    # __missing__ is optional according to __getitem__ specification,
-    # so it is skipped
-
-    # __iter__ and __reversed__ have a default implementation
-    # based on __len__ and __getitem__, so they are skipped.
-
-    # [Numeric Types]
-
-    def __add__(self, other: object):
-        return self.__getattr__("__add__")
-
-    def __sub__(self, other: object):
-        return self.__getattr__("__sub__")
-
-    def __mul__(self, other: object):
-        return self.__getattr__("__mul__")
-
-    def __matmul__(self, other: object):
-        return self.__getattr__("__matmul__")
-
-    def __truediv__(self, other: object):
-        return self.__getattr__("__truediv__")
-
-    def __floordiv__(self, other: object):
-        return self.__getattr__("__floordiv__")
-
-    def __mod__(self, other: object):
-        return self.__getattr__("__mod__")
-
-    def __divmod__(self, other: object):
-        return self.__getattr__("__divmod__")
-
-    def __pow__(self, other: object, modulo: object = ...):
-        return self.__getattr__("__pow__")
-
-    def __lshift__(self, other: object):
-        return self.__getattr__("__lshift__")
-
-    def __rshift__(self, other: object):
-        return self.__getattr__("__rshift__")
-
-    def __and__(self, other: object):
-        return self.__getattr__("__and__")
-
-    def __xor__(self, other: object):
-        return self.__getattr__("__xor__")
-
-    def __or__(self, other: object):
-        return self.__getattr__("__or__")
-
-    # r* and i* methods have lower priority than
-    # the methods for left operand so they are skipped
-
-    def __neg__(self):
-        return self.__getattr__("__neg__")
-
-    def __pos__(self):
-        return self.__getattr__("__pos__")
-
-    def __abs__(self):
-        return self.__getattr__("__abs__")
-
-    def __invert__(self):
-        return self.__getattr__("__invert__")
-
-    # __complex__, __int__ and __float__ have a default implementation
-    # based on __index__, so they are skipped.
-
-    def __index__(self):
-        return self.__getattr__("__index__")
-
-    def __round__(self, ndigits: object = ...):
-        return self.__getattr__("__round__")
-
-    def __trunc__(self):
-        return self.__getattr__("__trunc__")
-
-    def __floor__(self):
-        return self.__getattr__("__floor__")
-
-    def __ceil__(self):
-        return self.__getattr__("__ceil__")
-
-    # [Context managers]
-
-    def __enter__(self):
-        return self.__getattr__("__enter__")
-
-    def __exit__(self, *args: object, **kwargs: object):
-        return self.__getattr__("__exit__")
-
-
-class PlaceholderModule(_PlaceholderBase):
-    """
-    A placeholder object to use when a module does not exist.
-
-    This enables more informative errors when trying to access attributes
-    of a module that does not exist.
-    """
-
-    def __init__(self, name: str) -> None:
-        super().__init__()
-
-        # Apply name mangling to avoid conflicting with module attributes
-        self.__name = name
-
-    def placeholder_attr(self, attr_path: str):
-        return _PlaceholderModuleAttr(self, attr_path)
-
-    def __getattr__(self, key: str):
-        name = self.__name
-
-        try:
-            importlib.import_module(name)
-        except ImportError as exc:
-            for extra, names in get_vllm_optional_dependencies().items():
-                if name in names:
-                    msg = f"Please install vllm[{extra}] for {extra} support"
-                    raise ImportError(msg) from exc
-
-            raise exc
-
-        raise AssertionError(
-            "PlaceholderModule should not be used "
-            "when the original module can be imported"
-        )
-
-
-class _PlaceholderModuleAttr(_PlaceholderBase):
-    def __init__(self, module: PlaceholderModule, attr_path: str) -> None:
-        super().__init__()
-
-        # Apply name mangling to avoid conflicting with module attributes
-        self.__module = module
-        self.__attr_path = attr_path
-
-    def placeholder_attr(self, attr_path: str):
-        return _PlaceholderModuleAttr(self.__module, f"{self.__attr_path}.{attr_path}")
-
-    def __getattr__(self, key: str):
-        getattr(self.__module, f"{self.__attr_path}.{key}")
-
-        raise AssertionError(
-            "PlaceholderModule should not be used "
-            "when the original module can be imported"
-        )
-
-
 # create a library to hold the custom op
 vllm_lib = Library("vllm", "FRAGMENT")  # noqa
 
@@ -1871,15 +1618,6 @@ def direct_register_custom_op(
         my_lib._register_fake(op_name, fake_impl)
 
 
-def resolve_obj_by_qualname(qualname: str) -> Any:
-    """
-    Resolve an object by its fully-qualified class name.
-    """
-    module_name, obj_name = qualname.rsplit(".", 1)
-    module = importlib.import_module(module_name)
-    return getattr(module, obj_name)
-
-
 def kill_process_tree(pid: int):
     """
     Kills all descendant processes of the given pid by sending SIGKILL.
@@ -2427,57 +2165,6 @@ def wrapped_init(self, *args, **kwargs) -> None:
     return cls
 
 
-class LazyLoader(types.ModuleType):
-    """
-    LazyLoader module borrowed from Tensorflow
-    https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py
-    with an addition of "module caching".
-
-    Lazily import a module, mainly to avoid pulling in large dependencies.
-    Modules such as `xgrammar` might do additional side effects, so we
-    only want to use this when it is needed, delaying all eager effects
-    """
-
-    def __init__(
-        self,
-        local_name: str,
-        parent_module_globals: dict[str, Any],
-        name: str,
-    ):
-        self._local_name = local_name
-        self._parent_module_globals = parent_module_globals
-        self._module: types.ModuleType | None = None
-
-        super().__init__(str(name))
-
-    def _load(self) -> types.ModuleType:
-        # Import the target module and insert it into the parent's namespace
-        try:
-            module = importlib.import_module(self.__name__)
-            self._parent_module_globals[self._local_name] = module
-            # The additional add to sys.modules
-            # ensures library is actually loaded.
-            sys.modules[self._local_name] = module
-        except ModuleNotFoundError as err:
-            raise err from None
-
-        # Update this object's dict so that if someone keeps a
-        # reference to the LazyLoader, lookups are efficient
-        # (__getattr__ is only called on lookups that fail).
-        self.__dict__.update(module.__dict__)
-        return module
-
-    def __getattr__(self, item: Any) -> Any:
-        if self._module is None:
-            self._module = self._load()
-        return getattr(self._module, item)
-
-    def __dir__(self) -> list[str]:
-        if self._module is None:
-            self._module = self._load()
-        return dir(self._module)
-
-
 @contextlib.contextmanager
 def cprofile_context(save_file: str | None = None):
     """Run a cprofile
diff --git a/vllm/utils/import_utils.py b/vllm/utils/import_utils.py
new file mode 100644
index 000000000000..fdc3d356a7eb
--- /dev/null
+++ b/vllm/utils/import_utils.py
@@ -0,0 +1,326 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Contains helpers related to importing modules.
+
+This is similar in concept to the `importlib` module.
+"""
+
+import importlib.metadata
+import importlib.util
+import os
+import sys
+from functools import cache
+from types import ModuleType
+from typing import Any
+
+import regex as re
+from typing_extensions import Never
+
+
+def import_from_path(module_name: str, file_path: str | os.PathLike):
+    """
+    Import a Python file according to its file path.
+
+    Based on the official recipe:
+    https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
+    """
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ModuleNotFoundError(f"No module named {module_name!r}")
+
+    assert spec.loader is not None
+
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+def resolve_obj_by_qualname(qualname: str) -> Any:
+    """
+    Resolve an object by its fully-qualified class name.
+    """
+    module_name, obj_name = qualname.rsplit(".", 1)
+    module = importlib.import_module(module_name)
+    return getattr(module, obj_name)
+
+
+@cache
+def get_vllm_optional_dependencies():
+    metadata = importlib.metadata.metadata("vllm")
+    requirements = metadata.get_all("Requires-Dist", [])
+    extras = metadata.get_all("Provides-Extra", [])
+
+    return {
+        extra: [
+            re.split(r";|>=|<=|==", req)[0]
+            for req in requirements
+            if req.endswith(f'extra == "{extra}"')
+        ]
+        for extra in extras
+    }
+
+
+class _PlaceholderBase:
+    """
+    Disallows downstream usage of placeholder modules.
+
+    We need to explicitly override each dunder method because
+    [`__getattr__`][vllm.utils.import_utils._PlaceholderBase.__getattr__]
+    is not called when they are accessed.
+
+    Info:
+        [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
+    """
+
+    def __getattr__(self, key: str) -> Never:
+        """
+        The main class should implement this to throw an error
+        for attribute accesses representing downstream usage.
+        """
+        raise NotImplementedError
+
+    # [Basic customization]
+
+    def __lt__(self, other: object):
+        return self.__getattr__("__lt__")
+
+    def __le__(self, other: object):
+        return self.__getattr__("__le__")
+
+    def __eq__(self, other: object):
+        return self.__getattr__("__eq__")
+
+    def __ne__(self, other: object):
+        return self.__getattr__("__ne__")
+
+    def __gt__(self, other: object):
+        return self.__getattr__("__gt__")
+
+    def __ge__(self, other: object):
+        return self.__getattr__("__ge__")
+
+    def __hash__(self):
+        return self.__getattr__("__hash__")
+
+    def __bool__(self):
+        return self.__getattr__("__bool__")
+
+    # [Callable objects]
+
+    def __call__(self, *args: object, **kwargs: object):
+        return self.__getattr__("__call__")
+
+    # [Container types]
+
+    def __len__(self):
+        return self.__getattr__("__len__")
+
+    def __getitem__(self, key: object):
+        return self.__getattr__("__getitem__")
+
+    def __setitem__(self, key: object, value: object):
+        return self.__getattr__("__setitem__")
+
+    def __delitem__(self, key: object):
+        return self.__getattr__("__delitem__")
+
+    # __missing__ is optional according to __getitem__ specification,
+    # so it is skipped
+
+    # __iter__ and __reversed__ have a default implementation
+    # based on __len__ and __getitem__, so they are skipped.
+
+    # [Numeric Types]
+
+    def __add__(self, other: object):
+        return self.__getattr__("__add__")
+
+    def __sub__(self, other: object):
+        return self.__getattr__("__sub__")
+
+    def __mul__(self, other: object):
+        return self.__getattr__("__mul__")
+
+    def __matmul__(self, other: object):
+        return self.__getattr__("__matmul__")
+
+    def __truediv__(self, other: object):
+        return self.__getattr__("__truediv__")
+
+    def __floordiv__(self, other: object):
+        return self.__getattr__("__floordiv__")
+
+    def __mod__(self, other: object):
+        return self.__getattr__("__mod__")
+
+    def __divmod__(self, other: object):
+        return self.__getattr__("__divmod__")
+
+    def __pow__(self, other: object, modulo: object = ...):
+        return self.__getattr__("__pow__")
+
+    def __lshift__(self, other: object):
+        return self.__getattr__("__lshift__")
+
+    def __rshift__(self, other: object):
+        return self.__getattr__("__rshift__")
+
+    def __and__(self, other: object):
+        return self.__getattr__("__and__")
+
+    def __xor__(self, other: object):
+        return self.__getattr__("__xor__")
+
+    def __or__(self, other: object):
+        return self.__getattr__("__or__")
+
+    # r* and i* methods have lower priority than
+    # the methods for left operand so they are skipped
+
+    def __neg__(self):
+        return self.__getattr__("__neg__")
+
+    def __pos__(self):
+        return self.__getattr__("__pos__")
+
+    def __abs__(self):
+        return self.__getattr__("__abs__")
+
+    def __invert__(self):
+        return self.__getattr__("__invert__")
+
+    # __complex__, __int__ and __float__ have a default implementation
+    # based on __index__, so they are skipped.
+
+    def __index__(self):
+        return self.__getattr__("__index__")
+
+    def __round__(self, ndigits: object = ...):
+        return self.__getattr__("__round__")
+
+    def __trunc__(self):
+        return self.__getattr__("__trunc__")
+
+    def __floor__(self):
+        return self.__getattr__("__floor__")
+
+    def __ceil__(self):
+        return self.__getattr__("__ceil__")
+
+    # [Context managers]
+
+    def __enter__(self):
+        return self.__getattr__("__enter__")
+
+    def __exit__(self, *args: object, **kwargs: object):
+        return self.__getattr__("__exit__")
+
+
+class PlaceholderModule(_PlaceholderBase):
+    """
+    A placeholder object to use when a module does not exist.
+
+    This enables more informative errors when trying to access attributes
+    of a module that does not exist.
+    """
+
+    def __init__(self, name: str) -> None:
+        super().__init__()
+
+        # Apply name mangling to avoid conflicting with module attributes
+        self.__name = name
+
+    def placeholder_attr(self, attr_path: str):
+        return _PlaceholderModuleAttr(self, attr_path)
+
+    def __getattr__(self, key: str) -> Never:
+        name = self.__name
+
+        try:
+            importlib.import_module(name)
+        except ImportError as exc:
+            for extra, names in get_vllm_optional_dependencies().items():
+                if name in names:
+                    msg = f"Please install vllm[{extra}] for {extra} support"
+                    raise ImportError(msg) from exc
+
+            raise exc
+
+        raise AssertionError(
+            "PlaceholderModule should not be used "
+            "when the original module can be imported"
+        )
+
+
+class _PlaceholderModuleAttr(_PlaceholderBase):
+    def __init__(self, module: PlaceholderModule, attr_path: str) -> None:
+        super().__init__()
+
+        # Apply name mangling to avoid conflicting with module attributes
+        self.__module = module
+        self.__attr_path = attr_path
+
+    def placeholder_attr(self, attr_path: str):
+        return _PlaceholderModuleAttr(self.__module, f"{self.__attr_path}.{attr_path}")
+
+    def __getattr__(self, key: str) -> Never:
+        getattr(self.__module, f"{self.__attr_path}.{key}")
+
+        raise AssertionError(
+            "PlaceholderModule should not be used "
+            "when the original module can be imported"
+        )
+
+
+class LazyLoader(ModuleType):
+    """
+    `LazyLoader` module borrowed from [Tensorflow]
+    (https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py)
+    with an addition of "module caching".
+
+    Lazily import a module, mainly to avoid pulling in large dependencies.
+    Modules such as `xgrammar` might do additional side effects, so we
+    only want to use this when it is needed, delaying all eager effects.
+    """
+
+    def __init__(
+        self,
+        local_name: str,
+        parent_module_globals: dict[str, Any],
+        name: str,
+    ):
+        self._local_name = local_name
+        self._parent_module_globals = parent_module_globals
+        self._module: ModuleType | None = None
+
+        super().__init__(str(name))
+
+    def _load(self) -> ModuleType:
+        # Import the target module and insert it into the parent's namespace
+        try:
+            module = importlib.import_module(self.__name__)
+            self._parent_module_globals[self._local_name] = module
+            # The additional add to sys.modules
+            # ensures library is actually loaded.
+            sys.modules[self._local_name] = module
+        except ModuleNotFoundError as err:
+            raise err from None
+
+        # Update this object's dict so that if someone keeps a
+        # reference to the LazyLoader, lookups are efficient
+        # (__getattr__ is only called on lookups that fail).
+        self.__dict__.update(module.__dict__)
+        return module
+
+    def __getattr__(self, item: Any) -> Any:
+        if self._module is None:
+            self._module = self._load()
+        return getattr(self._module, item)
+
+    def __dir__(self) -> list[str]:
+        if self._module is None:
+            self._module = self._load()
+        return dir(self._module)
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index a21f0715704a..0ca60ce5cf9a 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -32,10 +32,10 @@
     decorate_logs,
     get_hash_fn_by_name,
     make_zmq_socket,
-    resolve_obj_by_qualname,
     set_process_title,
 )
 from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.core.kv_cache_utils import (
     BlockHash,
     generate_scheduler_kv_cache_config,
diff --git a/vllm/v1/executor/abstract.py b/vllm/v1/executor/abstract.py
index 53617645f52c..2a7e052f1329 100644
--- a/vllm/v1/executor/abstract.py
+++ b/vllm/v1/executor/abstract.py
@@ -14,7 +14,7 @@
     ExecutorWithExternalLauncher as ExecutorWithExternalLauncherV0,
 )
 from vllm.executor.uniproc_executor import UniProcExecutor as UniProcExecutorV0  # noqa
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
 from vllm.v1.outputs import DraftTokenIds, ModelRunnerOutput
diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py
index 8d7f4b5d6896..4fb26ab1cefa 100644
--- a/vllm/v1/structured_output/__init__.py
+++ b/vllm/v1/structured_output/__init__.py
@@ -8,7 +8,7 @@
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParserManager
 from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_guidance import GuidanceBackend
 from vllm.v1.structured_output.backend_types import (
     StructuredOutputBackend,
diff --git a/vllm/v1/structured_output/backend_guidance.py b/vllm/v1/structured_output/backend_guidance.py
index 8e75b99f8481..00a625e103bd 100644
--- a/vllm/v1/structured_output/backend_guidance.py
+++ b/vllm/v1/structured_output/backend_guidance.py
@@ -11,7 +11,7 @@
 
 from vllm.logger import init_logger
 from vllm.sampling_params import SamplingParams
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
     StructuredOutputBackend,
     StructuredOutputGrammar,
diff --git a/vllm/v1/structured_output/backend_lm_format_enforcer.py b/vllm/v1/structured_output/backend_lm_format_enforcer.py
index c20e976d8487..150c57feda0f 100644
--- a/vllm/v1/structured_output/backend_lm_format_enforcer.py
+++ b/vllm/v1/structured_output/backend_lm_format_enforcer.py
@@ -10,7 +10,7 @@
 from transformers import PreTrainedTokenizerBase
 
 from vllm.sampling_params import SamplingParams
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
     StructuredOutputBackend,
     StructuredOutputGrammar,
diff --git a/vllm/v1/structured_output/backend_outlines.py b/vllm/v1/structured_output/backend_outlines.py
index 2355f8ab8f89..1e01788076d5 100644
--- a/vllm/v1/structured_output/backend_outlines.py
+++ b/vllm/v1/structured_output/backend_outlines.py
@@ -12,7 +12,7 @@
 from regex import escape as regex_escape
 
 from vllm.sampling_params import SamplingParams
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
     StructuredOutputBackend,
     StructuredOutputGrammar,
diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py
index 1b430157560c..074781fb6604 100644
--- a/vllm/v1/structured_output/backend_xgrammar.py
+++ b/vllm/v1/structured_output/backend_xgrammar.py
@@ -11,7 +11,7 @@
 from vllm.logger import init_logger
 from vllm.sampling_params import SamplingParams
 from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 from vllm.v1.structured_output.backend_types import (
     StructuredOutputBackend,
     StructuredOutputGrammar,
diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py
index 4b793b9a72fd..e20ad1a6ec31 100644
--- a/vllm/v1/structured_output/utils.py
+++ b/vllm/v1/structured_output/utils.py
@@ -13,7 +13,7 @@
 
 import vllm.envs as envs
 from vllm.logger import init_logger
-from vllm.utils import LazyLoader
+from vllm.utils.import_utils import LazyLoader
 
 if TYPE_CHECKING:
     import outlines_core as oc
diff --git a/vllm/v1/worker/worker_base.py b/vllm/v1/worker/worker_base.py
index 85436b443f7c..9319918b84be 100644
--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@@ -15,11 +15,11 @@
 from vllm.multimodal.cache import worker_receiver_cache_from_config
 from vllm.utils import (
     enable_trace_function_call_for_thread,
-    resolve_obj_by_qualname,
     run_method,
     update_environment_variables,
     warn_for_unimplemented_methods,
 )
+from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.kv_cache_interface import KVCacheSpec
 
 if TYPE_CHECKING: