3838 - pip install -r requirements-docs.txt
3939 - SPHINXOPTS=\"-W\" make html
4040 # Check API reference (if it fails, you may have missing mock imports)
41- - grep \"sig sig-object py\" build/html/dev/sampling_params .html
41+ - grep \"sig sig-object py\" build/html/api/inference_params .html
4242
4343- label : Async Engine, Inputs, Utils, Worker Test # 24min
4444 fast_check : true
5252 - tests/worker
5353 - tests/standalone_tests/lazy_torch_compile.py
5454 commands :
55+ - pip install git+https://github.com/Isotr0py/DeepSeek-VL2.git # Used by multimoda processing test
5556 - python3 standalone_tests/lazy_torch_compile.py
5657 - pytest -v -s mq_llm_engine # MQLLMEngine
5758 - pytest -v -s async_engine # AsyncLLMEngine
@@ -187,19 +188,19 @@ steps:
187188 - examples/
188189 commands :
189190 - pip install tensorizer # for tensorizer test
190- - python3 offline_inference.py
191- - python3 cpu_offload.py
192- - python3 offline_inference_chat .py
193- - python3 offline_inference_with_prefix .py
194- - python3 llm_engine_example.py
195- - python3 offline_inference_vision_language .py
196- - python3 offline_inference_vision_language_multi_image .py
197- - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
198- - python3 offline_inference_encoder_decoder .py
199- - python3 offline_inference_classification .py
200- - python3 offline_inference_embedding .py
201- - python3 offline_inference_scoring .py
202- - python3 offline_profile .py --model facebook/opt-125m run_num_steps --num-steps 2
191+ - python3 offline_inference/basic .py
192+ - python3 offline_inference/ cpu_offload.py
193+ - python3 offline_inference/chat .py
194+ - python3 offline_inference/prefix_caching .py
195+ - python3 offline_inference/ llm_engine_example.py
196+ - python3 offline_inference/vision_language .py
197+ - python3 offline_inference/vision_language_multi_image .py
198+ - python3 other/ tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other/ tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
199+ - python3 offline_inference/encoder_decoder .py
200+ - python3 offline_inference/classification .py
201+ - python3 offline_inference/embedding .py
202+ - python3 offline_inference/scoring .py
203+ - python3 offline_inference/profiling .py --model facebook/opt-125m run_num_steps --num-steps 2
203204
204205- label : Prefix Caching Test # 9min
205206 mirror_hardwares : [amd]
@@ -214,6 +215,7 @@ steps:
214215 - vllm/model_executor/layers
215216 - vllm/sampling_metadata.py
216217 - tests/samplers
218+ - tests/conftest.py
217219 commands :
218220 - pytest -v -s samplers
219221 - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
@@ -229,20 +231,22 @@ steps:
229231 - pytest -v -s test_logits_processor.py
230232 - pytest -v -s model_executor/test_guided_processors.py
231233
232- - label : Speculative decoding tests # 30min
234+ - label : Speculative decoding tests # 40min
233235 source_file_dependencies :
234236 - vllm/spec_decode
235237 - tests/spec_decode
238+ - vllm/model_executor/models/eagle.py
236239 commands :
237240 - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
238241 - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
242+ - pytest -v -s spec_decode/e2e/test_eagle_correctness.py
239243
240244- label : LoRA Test %N # 15min each
241245 mirror_hardwares : [amd]
242246 source_file_dependencies :
243247 - vllm/lora
244248 - tests/lora
245- command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
249+ command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py
246250 parallelism : 4
247251
248252- label : " PyTorch Fullgraph Smoke Test" # 9min
@@ -367,6 +371,7 @@ steps:
367371 - tests/models/encoder_decoder/vision_language
368372 commands :
369373 - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
374+ - pytest -v -s models/multimodal
370375 - pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model'
371376 - pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'core_model or quant_model'
372377 - pytest -v -s models/embedding/vision_language -m core_model
@@ -535,6 +540,7 @@ steps:
535540 # requires multi-GPU testing for validation.
536541 - pytest -v -s -x lora/test_chatglm3_tp.py
537542 - pytest -v -s -x lora/test_llama_tp.py
543+ - pytest -v -s -x lora/test_minicpmv_tp.py
538544
539545
540546- label : Weight Loading Multiple GPU Test # 33min
0 commit comments