Add docker protocol support for llama-server model loading

ericcurtin · ericcurtin · commit 85ad51609c45 · 2025-09-09T18:29:50.000+01:00
To pull and run models via:

llama-server -d ai/smollm2:135M-Q4_K_M

Signed-off-by: Eric Curtin &lt;ericcurtin17@gmail.com&gt;
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -745,6 +745,97 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
 
 #endif // LLAMA_USE_CURL
 
+//
+// Docker registry functions
+//
+
+static std::string common_docker_get_token(const std::string & repo) {
+    std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull";
+
+    common_remote_params params;
+    auto                 res = common_remote_get_content(url, params);
+
+    if (res.first != 200) {
+        throw std::runtime_error("Failed to get Docker registry token, HTTP code: " + std::to_string(res.first));
+    }
+
+    std::string            response_str(res.second.begin(), res.second.end());
+    nlohmann::ordered_json response = nlohmann::ordered_json::parse(response_str);
+
+    if (!response.contains("token")) {
+        throw std::runtime_error("Docker registry token response missing 'token' field");
+    }
+
+    return response["token"].get<std::string>();
+}
+
+static std::string common_docker_resolve_model(const std::string & docker) {
+    // Parse ai/smollm2:135M-Q4_K_M
+    size_t      colon_pos = docker.find(':');
+    std::string repo, tag;
+    if (colon_pos != std::string::npos) {
+        repo = docker.substr(0, colon_pos);
+        tag  = docker.substr(colon_pos + 1);
+    } else {
+        repo = docker;
+        tag  = "latest";
+    }
+
+    LOG_INF("Downloading Docker Model: %s:%s\n", repo.c_str(), tag.c_str());
+    try {
+        std::string token = common_docker_get_token(repo);  // Get authentication token
+
+        // Get manifest
+        std::string          manifest_url = "https://registry-1.docker.io/v2/" + repo + "/manifests/" + tag;
+        common_remote_params manifest_params;
+        manifest_params.headers.push_back("Authorization: Bearer " + token);
+        manifest_params.headers.push_back(
+            "Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json");
+        auto manifest_res = common_remote_get_content(manifest_url, manifest_params);
+        if (manifest_res.first != 200) {
+            throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(manifest_res.first));
+        }
+
+        std::string            manifest_str(manifest_res.second.begin(), manifest_res.second.end());
+        nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(manifest_str);
+        std::string            gguf_digest;  // Find the GGUF layer
+        if (manifest.contains("layers")) {
+            for (const auto & layer : manifest["layers"]) {
+                if (layer.contains("mediaType")) {
+                    std::string media_type = layer["mediaType"].get<std::string>();
+                    if (media_type == "application/vnd.docker.ai.gguf.v3" ||
+                        media_type.find("gguf") != std::string::npos) {
+                        gguf_digest = layer["digest"].get<std::string>();
+                        break;
+                    }
+                }
+            }
+        }
+
+        if (gguf_digest.empty()) {
+            throw std::runtime_error("No GGUF layer found in Docker manifest");
+        }
+
+        // Prepare local filename
+        std::string model_filename = repo;
+        std::replace(model_filename.begin(), model_filename.end(), '/', '_');
+        model_filename += "_" + tag + ".gguf";
+        std::string local_path = fs_get_cache_file(model_filename);
+
+        // Download the blob using common_download_file_single with is_docker=true
+        std::string blob_url = "https://registry-1.docker.io/v2/" + repo + "/blobs/" + gguf_digest;
+        if (!common_download_file_single(blob_url, local_path, token, false)) {
+            throw std::runtime_error("Failed to download Docker blob");
+        }
+
+        LOG_INF("Downloaded Docker Model to: %s\n", local_path.c_str());
+        return local_path;
+    } catch (const std::exception & e) {
+        LOG_ERR("Docker Model download failed: %s\n", e.what());
+        throw;
+    }
+}
+
 //
 // utils
 //
@@ -795,7 +886,9 @@ static handle_model_result common_params_handle_model(
     handle_model_result result;
     // handle pre-fill default model path and url based on hf_repo and hf_file
     {
-        if (!model.hf_repo.empty()) {
+        if (!model.docker.empty()) {  // Handle Docker URLs by resolving them to local paths
+            model.path = common_docker_resolve_model(model.docker);
+        } else if (!model.hf_repo.empty()) {
             // short-hand to avoid specifying --hf-file -> default it to --model
             if (model.hf_file.empty()) {
                 if (model.path.empty()) {
@@ -2636,6 +2729,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.model.url = value;
         }
     ).set_env("LLAMA_ARG_MODEL_URL"));
+    add_opt(common_arg(
+        { "-d", "-dr", "--docker", "--docker-repo" }, "<repo>/<model>[:quant]",
+        "Docker Hub model repository; quant is optional, default to latest.\n"
+        "example: ai/smollm2:135M-Q4_K_M\n"
+        "(default: unused)",
+        [](common_params & params, const std::string & value) { params.model.docker = value; })
+    .set_env("LLAMA_ARG_DOCKER"));
     add_opt(common_arg(
         {"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
         "Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
diff --git a/common/common.h b/common/common.h
@@ -197,6 +197,7 @@ struct common_params_model {
     std::string url     = ""; // model url to download                                      // NOLINT
     std::string hf_repo = ""; // HF repo                                                    // NOLINT
     std::string hf_file = ""; // HF file                                                    // NOLINT
+    std::string docker  = ""; // Docker Model url to download                               // NOLINT
 };
 
 struct common_params_speculative {