|
20 | 20 | from vllm.executor.executor_base import ExecutorBase |
21 | 21 | from vllm.logger import init_logger |
22 | 22 | from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS |
| 23 | +from vllm.plugins import load_general_plugins |
23 | 24 | from vllm.transformers_utils.utils import check_gguf_file |
24 | 25 | from vllm.usage.usage_lib import UsageContext |
25 | 26 | from vllm.utils import FlexibleArgumentParser, StoreBoolean |
@@ -204,6 +205,8 @@ class EngineArgs: |
204 | 205 |
|
205 | 206 | calculate_kv_scales: Optional[bool] = None |
206 | 207 |
|
| 208 | + additional_config: Optional[Dict[str, Any]] = None |
| 209 | + |
207 | 210 | def __post_init__(self): |
208 | 211 | if not self.tokenizer: |
209 | 212 | self.tokenizer = self.model |
@@ -985,6 +988,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: |
985 | 988 | 'be loaded from the model checkpoint if available. ' |
986 | 989 | 'Otherwise, the scales will default to 1.0.') |
987 | 990 |
|
| 991 | + parser.add_argument( |
| 992 | + "--additional-config", |
| 993 | + type=json.loads, |
| 994 | + default=None, |
| 995 | + help="Additional config for specified platform in JSON format. " |
| 996 | + "Different platforms may support different configs. Make sure the " |
| 997 | + "configs are valid for the platform you are using. The input format" |
| 998 | + " is like '{\"config_key\":\"config_value\"}'") |
988 | 999 | return parser |
989 | 1000 |
|
990 | 1001 | @classmethod |
@@ -1046,6 +1057,9 @@ def create_load_config(self) -> LoadConfig: |
1046 | 1057 | def create_engine_config(self, |
1047 | 1058 | usage_context: Optional[UsageContext] = None |
1048 | 1059 | ) -> VllmConfig: |
| 1060 | + from vllm.platforms import current_platform |
| 1061 | + current_platform.pre_register_and_update() |
| 1062 | + |
1049 | 1063 | if envs.VLLM_USE_V1: |
1050 | 1064 | self._override_v1_engine_args(usage_context) |
1051 | 1065 |
|
@@ -1289,6 +1303,7 @@ def create_engine_config(self, |
1289 | 1303 | prompt_adapter_config=prompt_adapter_config, |
1290 | 1304 | compilation_config=self.compilation_config, |
1291 | 1305 | kv_transfer_config=self.kv_transfer_config, |
| 1306 | + additional_config=self.additional_config, |
1292 | 1307 | ) |
1293 | 1308 |
|
1294 | 1309 | if envs.VLLM_USE_V1: |
@@ -1349,6 +1364,12 @@ def add_cli_args(parser: FlexibleArgumentParser, |
1349 | 1364 | parser.add_argument('--disable-log-requests', |
1350 | 1365 | action='store_true', |
1351 | 1366 | help='Disable logging requests.') |
| 1367 | + # Initialize plugin to update the parser, for example, The plugin may |
| 1368 | + # adding a new kind of quantization method to --quantization argument or |
| 1369 | + # a new device to --device argument. |
| 1370 | + load_general_plugins() |
| 1371 | + from vllm.platforms import current_platform |
| 1372 | + current_platform.pre_register_and_update(parser) |
1352 | 1373 | return parser |
1353 | 1374 |
|
1354 | 1375 |
|
|
0 commit comments