diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 4b9f4aef022d..60a8e90cdc1a 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -299,7 +299,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1" # Disable multi-stream for shared experts as no Stream on CPU - os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "0" + os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "1" # Intel OpenMP setting ld_preload_str = os.getenv("LD_PRELOAD", "")