vllm-project · DarkLight1337 · Nov 7, 2025 · Nov 4, 2025
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -543,8 +543,11 @@ steps:
 
 - label: Model Executor Test # 23min
   timeout_in_minutes: 35
+  torch_nightly: true
   mirror_hardwares: [amdexperimental]
   source_file_dependencies:
+  - vllm/engine/arg_utils.py
+  - vllm/config/model.py
   - vllm/model_executor
   - tests/model_executor
   - tests/entrypoints/openai/test_tensorizer_entrypoint.py

diff --git a/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py
@@ -1,12 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import pytest
+
 from vllm import SamplingParams
 from vllm.config.load import LoadConfig
 from vllm.model_executor.model_loader import get_model_loader
 
 load_format = "runai_streamer"
 test_model = "openai-community/gpt2"
+# TODO(amacaskill): Replace with a GKE owned GCS bucket.
+test_gcs_model = "gs://vertex-model-garden-public-us/codegemma/codegemma-2b/"
 
 prompts = [
     "Hello, my name is",
@@ -32,3 +36,16 @@ def test_runai_model_loader_download_files(vllm_runner):
     with vllm_runner(test_model, load_format=load_format) as llm:
         deserialized_outputs = llm.generate(prompts, sampling_params)
         assert deserialized_outputs
+
+
+def test_runai_model_loader_download_files_gcs(
+    vllm_runner, monkeypatch: pytest.MonkeyPatch
+):
+    monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project")
+    monkeypatch.setenv("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS", "true")
+    monkeypatch.setenv(
+        "CLOUD_STORAGE_EMULATOR_ENDPOINT", "https://storage.googleapis.com"
+    )
+    with vllm_runner(test_gcs_model, load_format=load_format) as llm:
+        deserialized_outputs = llm.generate(prompts, sampling_params)
+        assert deserialized_outputs