@@ -298,6 +298,27 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
298
298
print_options (specific_options);
299
299
}
300
300
301
+ static std::vector<ggml_backend_dev_t > parse_device_list (const std::string & value) {
302
+ std::vector<ggml_backend_dev_t > devices;
303
+ auto dev_names = string_split<std::string>(value, ' ,' );
304
+ if (dev_names.empty ()) {
305
+ throw std::invalid_argument (" no devices specified" );
306
+ }
307
+ if (dev_names.size () == 1 && dev_names[0 ] == " none" ) {
308
+ devices.push_back (nullptr );
309
+ } else {
310
+ for (const auto & device : dev_names) {
311
+ auto * dev = ggml_backend_dev_by_name (device.c_str ());
312
+ if (!dev || ggml_backend_dev_type (dev) != GGML_BACKEND_DEVICE_TYPE_GPU) {
313
+ throw std::invalid_argument (string_format (" invalid device: %s" , device.c_str ()));
314
+ }
315
+ devices.push_back (dev);
316
+ }
317
+ devices.push_back (nullptr );
318
+ }
319
+ return devices;
320
+ }
321
+
301
322
bool common_params_parse (int argc, char ** argv, common_params & params, llama_example ex, void (*print_usage)(int , char **)) {
302
323
auto ctx_arg = common_params_parser_init (params, ex, print_usage);
303
324
const common_params params_org = ctx_arg.params ; // the example can modify the default params
@@ -324,6 +345,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
324
345
}
325
346
326
347
common_params_context common_params_parser_init (common_params & params, llama_example ex, void (*print_usage)(int , char **)) {
348
+ // load dynamic backends
349
+ ggml_backend_load_all ();
350
+
327
351
common_params_context ctx_arg (params);
328
352
ctx_arg.print_usage = print_usage;
329
353
ctx_arg.ex = ex;
@@ -1312,6 +1336,30 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1312
1336
else { throw std::invalid_argument (" invalid value" ); }
1313
1337
}
1314
1338
).set_env (" LLAMA_ARG_NUMA" ));
1339
+ add_opt (common_arg (
1340
+ {" -dev" , " --device" }, " <dev1,dev2,..>" ,
1341
+ " comma-separated list of devices to use for offloading (none = don't offload)\n "
1342
+ " use --list-devices to see a list of available devices" ,
1343
+ [](common_params & params, const std::string & value) {
1344
+ params.devices = parse_device_list (value);
1345
+ }
1346
+ ).set_env (" LLAMA_ARG_DEVICE" ));
1347
+ add_opt (common_arg (
1348
+ {" --list-devices" },
1349
+ " print list of available devices and exit" ,
1350
+ [](common_params &) {
1351
+ printf (" Available devices:\n " );
1352
+ for (size_t i = 0 ; i < ggml_backend_dev_count (); ++i) {
1353
+ auto * dev = ggml_backend_dev_get (i);
1354
+ if (ggml_backend_dev_type (dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
1355
+ size_t free, total;
1356
+ ggml_backend_dev_memory (dev, &free, &total);
1357
+ printf (" %s: %s (%zu MiB, %zu MiB free)\n " , ggml_backend_dev_name (dev), ggml_backend_dev_description (dev), total / 1024 / 1024 , free / 1024 / 1024 );
1358
+ }
1359
+ }
1360
+ exit (0 );
1361
+ }
1362
+ ));
1315
1363
add_opt (common_arg (
1316
1364
{" -ngl" , " --gpu-layers" , " --n-gpu-layers" }, " N" ,
1317
1365
" number of layers to store in VRAM" ,
@@ -1336,10 +1384,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1336
1384
} else if (arg_next == " layer" ) {
1337
1385
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
1338
1386
} else if (arg_next == " row" ) {
1339
- #ifdef GGML_USE_SYCL
1340
- fprintf (stderr, " warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\n Exit!\n " );
1341
- exit (1 );
1342
- #endif // GGML_USE_SYCL
1343
1387
params.split_mode = LLAMA_SPLIT_MODE_ROW;
1344
1388
} else {
1345
1389
throw std::invalid_argument (" invalid value" );
@@ -2042,6 +2086,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2042
2086
params.speculative .n_ctx = value;
2043
2087
}
2044
2088
).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2089
+ add_opt (common_arg (
2090
+ {" -devd" , " --device-draft" }, " <dev1,dev2,..>" ,
2091
+ " comma-separated list of devices to use for offloading the draft model (none = don't offload)\n "
2092
+ " use --list-devices to see a list of available devices" ,
2093
+ [](common_params & params, const std::string & value) {
2094
+ params.speculative .devices = parse_device_list (value);
2095
+ }
2096
+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2045
2097
add_opt (common_arg (
2046
2098
{" -ngld" , " --gpu-layers-draft" , " --n-gpu-layers-draft" }, " N" ,
2047
2099
" number of layers to store in VRAM for the draft model" ,
0 commit comments