File tree Expand file tree Collapse file tree 2 files changed +5
-3
lines changed
model_executor/model_loader Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -360,8 +360,9 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
360360 # Enables weights compression during model export via HF Optimum
361361 # default is False
362362 "VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS" :
363- lambda : bool (os .getenv ("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS" , False )),
364-
363+ lambda :
364+ (os .environ .get ("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS" , "0" ).lower () in
365+ ("on" , "true" , "1" )),
365366 # If the env var is set, then all workers will execute as separate
366367 # processes from the engine, and we use the same mechanism to trigger
367368 # execution on all workers.
Original file line number Diff line number Diff line change @@ -125,7 +125,8 @@ def __init__(
125125 "as-is, all possible options that may affect model conversion "
126126 "are ignored." )
127127
128- load_in_8bit = envs .VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
128+ load_in_8bit = (envs .VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
129+ if export else False )
129130 pt_model = OVModelForCausalLM .from_pretrained (
130131 model_config .model ,
131132 export = export ,
You can’t perform that action at this time.
0 commit comments