File tree Expand file tree Collapse file tree 1 file changed +13
-2
lines changed Expand file tree Collapse file tree 1 file changed +13
-2
lines changed Original file line number Diff line number Diff line change @@ -1440,8 +1440,19 @@ def _override_v1_engine_args(self, usage_context: UsageContext) -> None:
14401440 # When no user override, set the default values based on the usage
14411441 # context.
14421442 # Use different default values for different hardware.
1443- from vllm .platforms import current_platform
1444- device_name = current_platform .get_device_name ().lower ()
1443+
1444+ # Try to query the device name on the current platform. If it fails,
1445+ # it may be because the platform that imports vLLM is not the same
1446+ # as the platform that vLLM is running on (e.g. the case of scaling
1447+ # vLLM with Ray) and has no GPUs. In this case we use the default
1448+ # values for non-H100/H200 GPUs.
1449+ try :
1450+ from vllm .platforms import current_platform
1451+ device_name = current_platform .get_device_name ().lower ()
1452+ except Exception :
1453+ # This is only used to set default_max_num_batched_tokens
1454+ device_name = "no-device"
1455+
14451456 if "h100" in device_name or "h200" in device_name :
14461457 # For H100 and H200, we use larger default values.
14471458 default_max_num_batched_tokens = {
You can’t perform that action at this time.
0 commit comments