@@ -50,19 +50,28 @@ steps:
5050 mirror_hardwares : [amdexperimental]
5151 source_file_dependencies :
5252 - vllm/
53+ - tests/multimodal
54+ - tests/utils_
55+ commands :
56+ - pytest -v -s -m 'not cpu_test' multimodal
57+ - pytest -v -s utils_
58+
59+ - label : Async Engine, Inputs, Utils, Worker Test (CPU) # 4 mins
60+ timeout_in_minutes : 10
61+ source_file_dependencies :
62+ - vllm/
5363 - tests/test_inputs.py
5464 - tests/test_outputs.py
5565 - tests/multimodal
56- - tests/utils_
5766 - tests/standalone_tests/lazy_imports.py
5867 - tests/transformers_utils
68+ no_gpu : true
5969 commands :
6070 - python3 standalone_tests/lazy_imports.py
6171 - pytest -v -s test_inputs.py
6272 - pytest -v -s test_outputs.py
63- - pytest -v -s multimodal
64- - pytest -v -s utils_ # Utils
65- - pytest -v -s transformers_utils # transformers_utils
73+ - pytest -v -s -m 'cpu_test' multimodal
74+ - pytest -v -s transformers_utils
6675
6776- label : Python-only Installation Test # 10min
6877 timeout_in_minutes : 20
@@ -159,10 +168,7 @@ steps:
159168 - examples/offline_inference/rlhf.py
160169 - examples/offline_inference/rlhf_colocate.py
161170 - tests/examples/offline_inference/data_parallel.py
162- - tests/v1/test_async_llm_dp.py
163- - tests/v1/test_external_lb_dp.py
164- - tests/v1/test_internal_lb_dp.py
165- - tests/v1/test_hybrid_lb_dp.py
171+ - tests/v1/distributed
166172 - tests/v1/engine/test_engine_core_client.py
167173 - tests/distributed/test_symm_mem_allreduce.py
168174 commands :
@@ -180,10 +186,10 @@ steps:
180186 - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
181187 # test with internal dp
182188 - python3 ../examples/offline_inference/data_parallel.py --enforce-eager
183- - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
184- - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/test_external_lb_dp.py
185- - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/test_internal_lb_dp.py
186- - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/test_hybrid_lb_dp.py
189+ - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/ test_async_llm_dp.py
190+ - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/ test_external_lb_dp.py
191+ - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/ test_internal_lb_dp.py
192+ - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/ test_hybrid_lb_dp.py
187193 - pytest -v -s v1/engine/test_engine_core_client.py::test_kv_cache_events_dp
188194 - pytest -v -s distributed/test_utils.py
189195 - pytest -v -s compile/test_basic_correctness.py
@@ -290,24 +296,34 @@ steps:
290296 - tests/v1
291297 commands :
292298 # split the test to avoid interference
293- - pytest -v -s v1/core
294299 - pytest -v -s v1/executor
295300 - pytest -v -s v1/kv_offload
296301 - pytest -v -s v1/sample
297302 - pytest -v -s v1/logits_processors
298303 - pytest -v -s v1/worker
299- - pytest -v -s v1/structured_output
300304 - pytest -v -s v1/spec_decode
301- - pytest -v -s v1/kv_connector/unit
302- - pytest -v -s v1/metrics
303- - pytest -v -s v1/test_serial_utils.py
304- - pytest -v -s v1/test_utils.py
305+ - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
306+ - pytest -v -s -m 'not cpu_test' v1/metrics
305307 - pytest -v -s v1/test_oracle.py
306- - pytest -v -s v1/test_metrics_reader .py
308+ - pytest -v -s v1/test_request .py
307309 # Integration test for streaming correctness (requires special branch).
308310 - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
309311 - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
310312
313+ - label : V1 Test others (CPU) # 5 mins
314+ source_file_dependencies :
315+ - vllm/
316+ - tests/v1
317+ no_gpu : true
318+ commands :
319+ # split the test to avoid interference
320+ - pytest -v -s v1/core
321+ - pytest -v -s v1/structured_output
322+ - pytest -v -s v1/test_serial_utils.py
323+ - pytest -v -s -m 'cpu_test' v1/kv_connector/unit
324+ - pytest -v -s -m 'cpu_test' v1/metrics
325+
326+
311327- label : Examples Test # 30min
312328 timeout_in_minutes : 45
313329 mirror_hardwares : [amdexperimental]
@@ -463,29 +479,18 @@ steps:
463479 commands :
464480 - pytest -v -s kernels/mamba
465481
466- - label : Tensorizer Test # 14min
467- timeout_in_minutes : 25
468- mirror_hardwares : [amdexperimental]
469- source_file_dependencies :
470- - vllm/model_executor/model_loader
471- - tests/tensorizer_loader
472- - tests/entrypoints/openai/test_tensorizer_entrypoint.py
473- commands :
474- - apt-get update && apt-get install -y curl libsodium23
475- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
476- - pytest -v -s tensorizer_loader
477- - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
478-
479- - label : Model Executor Test # 7min
480- timeout_in_minutes : 20
482+ - label : Model Executor Test # 23min
483+ timeout_in_minutes : 35
481484 mirror_hardwares : [amdexperimental]
482485 source_file_dependencies :
483486 - vllm/model_executor
484487 - tests/model_executor
488+ - tests/entrypoints/openai/test_tensorizer_entrypoint.py
485489 commands :
486490 - apt-get update && apt-get install -y curl libsodium23
487491 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
488492 - pytest -v -s model_executor
493+ - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
489494
490495- label : Benchmarks # 11min
491496 timeout_in_minutes : 20
@@ -520,7 +525,7 @@ steps:
520525 # https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
521526 # we can only upgrade after this is resolved
522527 - pip install --pre torchao==0.13.0.dev20250814 --index-url https://download.pytorch.org/whl/nightly/cu128
523- - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
528+ - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/
524529
525530- label : LM Eval Small Models # 53min
526531 timeout_in_minutes : 75
@@ -548,10 +553,17 @@ steps:
548553 source_file_dependencies :
549554 - vllm/
550555 - tests/tool_use
551- - tests/mistral_tool_use
552556 commands :
553- - pytest -v -s tool_use
554- - pytest -v -s mistral_tool_use
557+ - pytest -v -s -m 'not cpu_test' tool_use
558+
559+ - label : OpenAI-Compatible Tool Use (CPU) # 5 mins
560+ timeout_in_minutes : 10
561+ source_file_dependencies :
562+ - vllm/
563+ - tests/tool_use
564+ no_gpu : true
565+ commands :
566+ - pytest -v -s -m 'cpu_test' tool_use
555567
556568# #### models test #####
557569
@@ -591,13 +603,19 @@ steps:
591603 - vllm/
592604 - tests/models/test_transformers.py
593605 - tests/models/test_registry.py
606+ commands :
607+ - pytest -v -s models/test_transformers.py models/test_registry.py
608+
609+ - label : Basic Models Test (Other CPU) # 5min
610+ timeout_in_minutes : 10
611+ torch_nightly : true
612+ source_file_dependencies :
613+ - vllm/
594614 - tests/models/test_utils.py
595615 - tests/models/test_vision.py
616+ no_gpu : true
596617 commands :
597- - pytest -v -s models/test_transformers.py \
598- models/test_registry.py \
599- models/test_utils.py \
600- models/test_vision.py
618+ - pytest -v -s models/test_utils.py models/test_vision.py
601619
602620- label : Language Models Tests (Standard)
603621 timeout_in_minutes : 25
@@ -767,11 +785,13 @@ steps:
767785 commands :
768786 - pip install --upgrade git+https://github.com/huggingface/transformers
769787 - pytest -v -s tests/models/test_initialization.py
788+ - pytest -v -s tests/models/test_transformers.py
770789 - pytest -v -s tests/models/multimodal/processing/
771790 - pytest -v -s tests/models/multimodal/test_mapping.py
772791 - python3 examples/offline_inference/basic/chat.py
773- - python3 examples/offline_inference/audio_language.py --model-type whisper
774792 - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
793+ # Whisper needs spawn method to avoid deadlock
794+ - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
775795
776796- label : Blackwell Test # 38 min
777797 timeout_in_minutes : 60
@@ -827,6 +847,23 @@ steps:
827847 - uv pip install --system 'gpt-oss[eval]==0.0.5'
828848 - pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 --server-args '--tensor-parallel-size 2'
829849
850+ - label : Blackwell Quantized MoE Test
851+ timeout_in_minutes : 60
852+ working_dir : " /vllm-workspace/"
853+ gpu : b200
854+ source_file_dependencies :
855+ - tests/quantization/test_blackwell_moe.py
856+ - vllm/model_executor/models/deepseek_v2.py
857+ - vllm/model_executor/models/gpt_oss.py
858+ - vllm/model_executor/models/llama4.py
859+ - vllm/model_executor/layers/fused_moe
860+ - vllm/model_executor/layers/quantization/compressed_tensors
861+ - vllm/model_executor/layers/quantization/modelopt.py
862+ - vllm/model_executor/layers/quantization/mxfp4.py
863+ - vllm/v1/attention/backends/flashinfer.py
864+ commands :
865+ - pytest -s -v tests/quantization/test_blackwell_moe.py
866+
830867# #### 1 GPU test #####
831868# #### multi gpus test #####
832869
@@ -869,48 +906,58 @@ steps:
869906 - NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
870907 - python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
871908
872- - label : Distributed Tests (2 GPUs) # 110min
873- timeout_in_minutes : 150
909+ - label : Distributed Tests (2 GPUs) # 68min
910+ timeout_in_minutes : 90
874911 mirror_hardwares : [amdexperimental]
875912 working_dir : " /vllm-workspace/tests"
876913 num_gpus : 2
877914 source_file_dependencies :
915+ - vllm/compilation/
878916 - vllm/distributed/
879917 - vllm/engine/
880918 - vllm/executor/
881- - vllm/model_executor/models/
882- - tests/distributed/
883- - vllm/compilation
884919 - vllm/worker/worker_base.py
885- - entrypoints/llm/test_collective_rpc.py
886- - tests/v1/test_async_llm_dp.py
887- - tests/v1/test_external_lb_dp.py
888- - tests/v1/entrypoints/openai/test_multi_api_servers.py
889920 - vllm/v1/engine/
890921 - vllm/v1/worker/
922+ - tests/compile/test_basic_correctness.py
923+ - tests/compile/test_wrapper.py
924+ - tests/distributed/
925+ - tests/entrypoints/llm/test_collective_rpc.py
926+ - tests/v1/distributed
927+ - tests/v1/entrypoints/openai/test_multi_api_servers.py
928+ - tests/v1/shutdown
891929 - tests/v1/worker/test_worker_memory_snapshot.py
892930 commands :
893- - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
894- - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_external_lb_dp.py
931+ - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/ test_async_llm_dp.py
932+ - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/ test_external_lb_dp.py
895933 - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
896934 - pytest -v -s entrypoints/llm/test_collective_rpc.py
897935 - pytest -v -s ./compile/test_basic_correctness.py
898936 - pytest -v -s ./compile/test_wrapper.py
899937 - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
938+ - pytest -v -s distributed/test_sequence_parallel.py
939+ - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
940+ - pytest -v -s v1/worker/test_worker_memory_snapshot.py
941+
942+ - label : Distributed Model Tests (2 GPUs) # 37min
943+ timeout_in_minutes : 50
944+ mirror_hardwares : [amdexperimental]
945+ working_dir : " /vllm-workspace/tests"
946+ num_gpus : 2
947+ source_file_dependencies :
948+ - vllm/model_executor/model_loader/sharded_state_loader.py
949+ - vllm/model_executor/models/
950+ - tests/basic_correctness/
951+ - tests/model_executor/model_loader/test_sharded_state_loader.py
952+ - tests/models/
953+ commands :
900954 - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
955+ - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py
901956 # Avoid importing model tests that cause CUDA reinitialization error
902957 - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
903958 - pytest models/language -v -s -m 'distributed(num_gpus=2)'
904959 - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
905960 - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
906- # test sequence parallel
907- - pytest -v -s distributed/test_sequence_parallel.py
908- # this test fails consistently.
909- # TODO: investigate and fix
910- - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
911- - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
912- - pytest -v -s models/multimodal/generation/test_maverick.py
913- - pytest -v -s v1/worker/test_worker_memory_snapshot.py
914961
915962- label : Plugin Tests (2 GPUs) # 40min
916963 timeout_in_minutes : 60
0 commit comments