From d89abd9e7679e452e0c5e225ffc012f164b83620 Mon Sep 17 00:00:00 2001 From: KuntaiDu Date: Mon, 3 Nov 2025 16:06:52 -0800 Subject: [PATCH] revert test changes Signed-off-by: KuntaiDu --- tests/v1/core/test_scheduler.py | 7 ------- tests/v1/core/utils.py | 2 -- .../v1/kv_connector/nixl_integration/run_accuracy_test.sh | 2 -- .../v1/kv_connector/nixl_integration/run_edge_case_test.sh | 2 -- tests/v1/kv_connector/unit/test_multi_connector.py | 1 - tests/v1/kv_connector/unit/test_nixl_connector.py | 1 - .../v1/kv_connector/unit/test_shared_storage_connector.py | 1 - tests/v1/kv_connector/unit/utils.py | 3 --- 8 files changed, 19 deletions(-) diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py index 92e3831b9c7a..749cf7dc8397 100644 --- a/tests/v1/core/test_scheduler.py +++ b/tests/v1/core/test_scheduler.py @@ -891,7 +891,6 @@ def test_kv_connector_basic(): scheduler = create_scheduler( enable_prefix_caching=True, use_kv_connector=True, - disable_hybrid_kv_cache_manager=True, ) NUM_TOTAL_BLOCKS = scheduler.kv_cache_manager.block_pool.get_num_free_blocks() BLOCK_SIZE = scheduler.cache_config.block_size @@ -1017,7 +1016,6 @@ def test_external_prefix_cache_metrics(): scheduler = create_scheduler( enable_prefix_caching=False, use_kv_connector=True, - disable_hybrid_kv_cache_manager=True, ) # Mock connector to simulate a partial external cache hit @@ -1082,7 +1080,6 @@ def test_kv_connector_unable_to_allocate(): use_kv_connector=True, block_size=BLOCK_SIZE, num_blocks=NUM_BLOCKS, - disable_hybrid_kv_cache_manager=True, ) NUM_MATCHED_NEW_TOKENS = BLOCK_SIZE * 2 scheduler.connector.get_num_new_matched_tokens = Mock(name="method") @@ -1166,7 +1163,6 @@ def test_kv_connector_handles_preemption(): use_kv_connector=True, block_size=BLOCK_SIZE, num_blocks=NUM_BLOCKS, - disable_hybrid_kv_cache_manager=True, ) NUM_MATCHED_NEW_TOKENS = BLOCK_SIZE @@ -1383,7 +1379,6 @@ def create_scheduler_with_priority( block_size: int = 16, max_model_len: int | None = None, num_speculative_tokens: int | None = None, - disable_hybrid_kv_cache_manager: bool = False, ) -> Scheduler: """Create scheduler with priority policy enabled. @@ -1408,7 +1403,6 @@ def create_scheduler_with_priority( disable_chunked_mm_input=disable_chunked_mm_input, enable_chunked_prefill=True, policy="priority", # Enable priority scheduling - disable_hybrid_kv_cache_manager=disable_hybrid_kv_cache_manager, ) model_config = ModelConfig( model=model, @@ -2015,7 +2009,6 @@ def test_priority_scheduling_preemption_and_resumption_when_out_of_kv(): num_blocks=5, # Can hold 64 tokens (first block is null) block_size=16, # Standard block size use_kv_connector=True, - disable_hybrid_kv_cache_manager=True, ) # Create a request and schedule it diff --git a/tests/v1/core/utils.py b/tests/v1/core/utils.py index 3f5e1b9eeaf7..6e739d6b0e77 100644 --- a/tests/v1/core/utils.py +++ b/tests/v1/core/utils.py @@ -46,7 +46,6 @@ def create_scheduler( num_speculative_tokens: int | None = None, skip_tokenizer_init: bool = False, async_scheduling: bool = False, - disable_hybrid_kv_cache_manager: bool = False, ) -> Scheduler | AsyncScheduler: """Create scheduler under test. @@ -71,7 +70,6 @@ def create_scheduler( disable_chunked_mm_input=disable_chunked_mm_input, enable_chunked_prefill=True, async_scheduling=async_scheduling, - disable_hybrid_kv_cache_manager=disable_hybrid_kv_cache_manager, ) model_config = ModelConfig( model=model, diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index a756858e2cc5..a9817313cf02 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -136,7 +136,6 @@ run_tests_for_model() { vllm serve $model_name \ --port $PORT \ --enforce-eager \ - --disable-hybrid-kv-cache-manager \ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ --tensor-parallel-size $PREFILLER_TP_SIZE \ --kv-transfer-config '$KV_CONFIG'" @@ -179,7 +178,6 @@ run_tests_for_model() { --port $PORT \ --enforce-eager \ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ - --disable-hybrid-kv-cache-manager \ --kv-transfer-config '$KV_CONFIG'" # DP-EP attention mode diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh index a3eeedb2e514..c48b452e24cd 100755 --- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh @@ -85,7 +85,6 @@ run_tests_for_model() { --port $PREFILL_PORT \ --enforce-eager \ --gpu-memory-utilization 0.2 \ - --disable-hybrid-kv-cache-manager \ --kv-transfer-config '$KV_CONFIG'" if [ -n "$model_args" ]; then @@ -104,7 +103,6 @@ run_tests_for_model() { --port $DECODE_PORT \ --enforce-eager \ --gpu-memory-utilization 0.2 \ - --disable-hybrid-kv-cache-manager \ --kv-transfer-config '$KV_CONFIG'" if [ -n "$model_args" ]; then diff --git a/tests/v1/kv_connector/unit/test_multi_connector.py b/tests/v1/kv_connector/unit/test_multi_connector.py index 6748532afd97..1c1ac915c758 100644 --- a/tests/v1/kv_connector/unit/test_multi_connector.py +++ b/tests/v1/kv_connector/unit/test_multi_connector.py @@ -114,7 +114,6 @@ def test_multi_shared_storage_connector_consistency(): enforce_eager=True, gpu_memory_utilization=0.5, kv_transfer_config=kv_transfer_config, - disable_hybrid_kv_cache_manager=True, ) # Run generation - this should trigger saving KV cache _ = llm.generate(PROMPTS, SAMPLING_PARAMS) diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index 1f3fdafc644d..475cf2285e39 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -1020,7 +1020,6 @@ def test_abort_timeout_on_prefiller(monkeypatch, distributed_executor_backend): "gpu_memory_utilization": 0.5, "kv_transfer_config": kv_transfer_config, "distributed_executor_backend": distributed_executor_backend, - "disable_hybrid_kv_cache_manager": True, } timeout = 6 diff --git a/tests/v1/kv_connector/unit/test_shared_storage_connector.py b/tests/v1/kv_connector/unit/test_shared_storage_connector.py index 6040ed5a6806..e7013a794a8c 100644 --- a/tests/v1/kv_connector/unit/test_shared_storage_connector.py +++ b/tests/v1/kv_connector/unit/test_shared_storage_connector.py @@ -132,7 +132,6 @@ def test_shared_storage_connector_hashes(tmp_path): enforce_eager=True, kv_transfer_config=kv_transfer_config, limit_mm_per_prompt={"image": 2}, - disable_hybrid_kv_cache_manager=True, ) # don't put this import at the top level diff --git a/tests/v1/kv_connector/unit/utils.py b/tests/v1/kv_connector/unit/utils.py index 46ea46e53084..e3f30bd7698f 100644 --- a/tests/v1/kv_connector/unit/utils.py +++ b/tests/v1/kv_connector/unit/utils.py @@ -91,9 +91,6 @@ def create_vllm_config( max_num_batched_tokens=max_num_batched_tokens, max_model_len=max_model_len, enable_chunked_prefill=enable_chunked_prefill, - # Disable hybrid KV cache manager for testing - # Should be removed after we support hybrid KV cache manager-based testing. - disable_hybrid_kv_cache_manager=True, ) model_config = ModelConfig( model=model,