[Bugfix] Fix boolean conversion for OpenVINO env variable (vllm-project#13615)

helena-intel · Akshat-Tripathi · commit 994dad5f0caf · 2025-03-03T18:05:38.000Z
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -360,8 +360,9 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
     # Enables weights compression during model export via HF Optimum
     # default is False
     "VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS":
-    lambda: bool(os.getenv("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS", False)),
-
+    lambda:
+    (os.environ.get("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS", "0").lower() in
+     ("on", "true", "1")),
     # If the env var is set, then all workers will execute as separate
     # processes from the engine, and we use the same mechanism to trigger
     # execution on all workers.
diff --git a/vllm/model_executor/model_loader/openvino.py b/vllm/model_executor/model_loader/openvino.py
@@ -125,7 +125,8 @@ def __init__(
                 "as-is, all possible options that may affect model conversion "
                 "are ignored.")
 
-        load_in_8bit = envs.VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
+        load_in_8bit = (envs.VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
+                        if export else False)
         pt_model = OVModelForCausalLM.from_pretrained(
             model_config.model,
             export=export,