Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 49 additions & 16 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,28 @@ steps:
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/multimodal
- tests/utils_
commands:
- pytest -v -s -m 'not cpu_test' multimodal
- pytest -v -s utils_

- label: Async Engine, Inputs, Utils, Worker Test (CPU) # 4 mins
timeout_in_minutes: 10
source_file_dependencies:
- vllm/
- tests/test_inputs.py
- tests/test_outputs.py
- tests/multimodal
- tests/utils_
- tests/standalone_tests/lazy_imports.py
- tests/transformers_utils
no_gpu: true
commands:
- python3 standalone_tests/lazy_imports.py
- pytest -v -s test_inputs.py
- pytest -v -s test_outputs.py
- pytest -v -s multimodal
- pytest -v -s utils_ # Utils
- pytest -v -s transformers_utils # transformers_utils
- pytest -v -s -m 'cpu_test' multimodal
- pytest -v -s transformers_utils

- label: Python-only Installation Test # 10min
timeout_in_minutes: 20
Expand Down Expand Up @@ -287,23 +296,34 @@ steps:
- tests/v1
commands:
# split the test to avoid interference
- pytest -v -s v1/core
- pytest -v -s v1/executor
- pytest -v -s v1/kv_offload
- pytest -v -s v1/sample
- pytest -v -s v1/logits_processors
- pytest -v -s v1/worker
- pytest -v -s v1/structured_output
- pytest -v -s v1/spec_decode
- pytest -v -s v1/kv_connector/unit
- pytest -v -s v1/metrics
- pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
- pytest -v -s -m 'not cpu_test' v1/metrics
- pytest -v -s v1/test_oracle.py
- pytest -v -s v1/test_request.py
- pytest -v -s v1/test_serial_utils.py
# Integration test for streaming correctness (requires special branch).
- pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
- pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine

- label: V1 Test others (CPU) # 5 mins
source_file_dependencies:
- vllm/
- tests/v1
no_gpu: true
commands:
# split the test to avoid interference
- pytest -v -s v1/core
- pytest -v -s v1/structured_output
- pytest -v -s v1/test_serial_utils.py
- pytest -v -s -m 'cpu_test' v1/kv_connector/unit
- pytest -v -s -m 'cpu_test' v1/metrics


- label: Examples Test # 30min
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
Expand Down Expand Up @@ -533,10 +553,17 @@ steps:
source_file_dependencies:
- vllm/
- tests/tool_use
- tests/mistral_tool_use
commands:
- pytest -v -s tool_use
- pytest -v -s mistral_tool_use
- pytest -v -s -m 'not cpu_test' tool_use

- label: OpenAI-Compatible Tool Use (CPU) # 5 mins
timeout_in_minutes: 10
source_file_dependencies:
- vllm/
- tests/tool_use
no_gpu: true
commands:
- pytest -v -s -m 'cpu_test' tool_use

##### models test #####

Expand Down Expand Up @@ -576,13 +603,19 @@ steps:
- vllm/
- tests/models/test_transformers.py
- tests/models/test_registry.py
commands:
- pytest -v -s models/test_transformers.py models/test_registry.py

- label: Basic Models Test (Other CPU) # 5min
timeout_in_minutes: 10
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/models/test_utils.py
- tests/models/test_vision.py
no_gpu: true
commands:
- pytest -v -s models/test_transformers.py \
models/test_registry.py \
models/test_utils.py \
models/test_vision.py
- pytest -v -s models/test_utils.py models/test_vision.py

- label: Language Models Tests (Standard)
timeout_in_minutes: 25
Expand Down
1 change: 0 additions & 1 deletion .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ pull_request_rules:
conditions:
- or:
- files~=^tests/tool_use/
- files~=^tests/mistral_tool_use/
- files~=^tests/entrypoints/openai/tool_parsers/
- files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py
- files~=^vllm/entrypoints/openai/tool_parsers/
Expand Down
17 changes: 9 additions & 8 deletions docker/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"

ENV UV_HTTP_TIMEOUT=500

# Install Python dependencies
# Install Python dependencies
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_INDEX_STRATEGY="unsafe-best-match"
Expand Down Expand Up @@ -104,7 +104,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel

######################### TEST DEPS #########################
FROM base AS vllm-test-deps
Expand All @@ -117,7 +117,7 @@ RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -r requirements/cpu-test.txt
uv pip install -r requirements/cpu-test.txt

######################### DEV IMAGE #########################
FROM vllm-build AS vllm-dev
Expand All @@ -130,12 +130,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \

# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e tests/vllm_test_utils
uv pip install -e tests/vllm_test_utils

RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
VLLM_TARGET_DEVICE=cpu python3 setup.py develop

COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt

Expand All @@ -160,11 +160,12 @@ ADD ./benchmarks/ ./benchmarks/
ADD ./vllm/collect_env.py .
ADD ./.buildkite/ ./.buildkite/

# Create symlink for vllm-workspace to maintain CI compatibility
RUN ln -sf /workspace /vllm-workspace

# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e tests/vllm_test_utils

ENTRYPOINT ["bash"]
uv pip install -e tests/vllm_test_utils

######################### RELEASE IMAGE #########################
FROM base AS vllm-openai
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ markers = [
"core_model: enable this model test in each PR instead of only nightly",
"hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)",
"cpu_model: enable this model test in CPU tests",
"cpu_test: mark test as CPU-only test",
"split: run this test as part of a split",
"distributed: run this test only in distributed GPU tests",
"skip_v1: do not run this test with v1",
Expand Down
3 changes: 3 additions & 0 deletions tests/models/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest
import torch

from vllm.model_executor.models.utils import AutoWeightsLoader

pytestmark = pytest.mark.cpu_test


class ModuleWithBatchNorm(torch.nn.Module):

Expand Down
2 changes: 2 additions & 0 deletions tests/models/test_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from vllm.platforms import current_platform
from vllm.utils import get_open_port, update_environment_variables

pytestmark = pytest.mark.cpu_test


@pytest.mark.parametrize(
("feature_sample_layers", "num_layers_loaded", "max_possible_layers",
Expand Down
2 changes: 2 additions & 0 deletions tests/multimodal/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
MultiModalSharedField)
from vllm.multimodal.processing import PromptInsertion

pytestmark = pytest.mark.cpu_test


def _dummy_elem(
modality: str,
Expand Down
2 changes: 2 additions & 0 deletions tests/multimodal/test_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

from vllm.multimodal.hasher import MultiModalHasher

pytestmark = pytest.mark.cpu_test

ASSETS_DIR = Path(__file__).parent / "assets"
assert ASSETS_DIR.exists()

Expand Down
2 changes: 2 additions & 0 deletions tests/multimodal/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from vllm.multimodal.image import ImageMediaIO, convert_image_mode

pytestmark = pytest.mark.cpu_test

ASSETS_DIR = Path(__file__).parent / "assets"
assert ASSETS_DIR.exists()

Expand Down
3 changes: 3 additions & 0 deletions tests/multimodal/test_inputs.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest
import torch

from vllm.multimodal.inputs import MultiModalKwargs, NestedTensors

pytestmark = pytest.mark.cpu_test


def assert_nested_tensors_equal(expected: NestedTensors,
actual: NestedTensors):
Expand Down
2 changes: 2 additions & 0 deletions tests/multimodal/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

from .utils import random_image

pytestmark = pytest.mark.cpu_test


# yapf: disable
@pytest.mark.parametrize(
Expand Down
2 changes: 2 additions & 0 deletions tests/multimodal/test_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

from ..models.utils import build_model_context

pytestmark = pytest.mark.cpu_test


@pytest.mark.parametrize(
"model_id,limit_mm_per_prompt,expected",
Expand Down
2 changes: 2 additions & 0 deletions tests/multimodal/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from .utils import cosine_similarity, create_video_from_image, normalize_image

pytestmark = pytest.mark.cpu_test

NUM_FRAMES = 10
FAKE_OUTPUT_1 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
FAKE_OUTPUT_2 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from vllm.inputs import zip_enc_dec_prompts
from vllm.inputs.parse import parse_and_batch_prompt

pytestmark = pytest.mark.cpu_test

STRING_INPUTS = [
'',
'foo',
Expand Down
4 changes: 4 additions & 0 deletions tests/test_outputs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest

from vllm.outputs import RequestOutput

pytestmark = pytest.mark.cpu_test


def test_request_output_forward_compatible():
output = RequestOutput(request_id="test_request_id",
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


# for each server config, download the model and return the config
@pytest.fixture(scope="session", params=CONFIGS.keys())
@pytest.fixture(scope="package", params=CONFIGS.keys())
def server_config(request):
config = CONFIGS[request.param]

Expand All @@ -26,7 +26,7 @@ def server_config(request):


# run this for each server config
@pytest.fixture(scope="session")
@pytest.fixture(scope="package")
def server(request, server_config: ServerConfig):
model = server_config["model"]
args_for_model = server_config["arguments"]
Expand Down
File renamed without changes.
2 changes: 2 additions & 0 deletions tests/tool_use/test_glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from vllm.entrypoints.openai.tool_parsers import Glm4MoeModelToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer

pytestmark = pytest.mark.cpu_test

pytest.skip("skip glm4_moe parser test", allow_module_level=True)
# Use a common model that is likely to be available
MODEL = "zai-org/GLM-4.5"
Expand Down
2 changes: 2 additions & 0 deletions tests/tool_use/test_jamba_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

pytestmark = pytest.mark.cpu_test

MODEL = "ai21labs/Jamba-tiny-dev"


Expand Down
2 changes: 2 additions & 0 deletions tests/tool_use/test_kimi_k2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer

pytestmark = pytest.mark.cpu_test

# Use a common model that is likely to be available
MODEL = "moonshotai/Kimi-K2-Instruct"

Expand Down
2 changes: 2 additions & 0 deletions tests/tool_use/test_minimax_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer

pytestmark = pytest.mark.cpu_test

# Use a common model that is likely to be available
MODEL = "MiniMaxAi/MiniMax-M1-40k"

Expand Down
2 changes: 2 additions & 0 deletions tests/tool_use/test_qwen3coder_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

pytestmark = pytest.mark.cpu_test

MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"


Expand Down
2 changes: 2 additions & 0 deletions tests/tool_use/test_seed_oss_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

pytestmark = pytest.mark.cpu_test

# Use a common model that is likely to be available
MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"

Expand Down
2 changes: 2 additions & 0 deletions tests/tool_use/test_tool_choice_required.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
ChatCompletionToolsParam)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat

pytestmark = pytest.mark.cpu_test

EXAMPLE_TOOLS = [
{
"type": "function",
Expand Down
2 changes: 2 additions & 0 deletions tests/tool_use/test_xlam_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

pytestmark = pytest.mark.cpu_test

# Use a common model that is likely to be available
MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"

Expand Down
Loading