Skip to content

Commit 85ad516

Browse files
committed
Add docker protocol support for llama-server model loading
To pull and run models via: llama-server -d ai/smollm2:135M-Q4_K_M Signed-off-by: Eric Curtin <[email protected]>
1 parent 4f63cd7 commit 85ad516

File tree

2 files changed

+102
-1
lines changed

2 files changed

+102
-1
lines changed

common/arg.cpp

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,97 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
745745

746746
#endif // LLAMA_USE_CURL
747747

748+
//
749+
// Docker registry functions
750+
//
751+
752+
static std::string common_docker_get_token(const std::string & repo) {
753+
std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull";
754+
755+
common_remote_params params;
756+
auto res = common_remote_get_content(url, params);
757+
758+
if (res.first != 200) {
759+
throw std::runtime_error("Failed to get Docker registry token, HTTP code: " + std::to_string(res.first));
760+
}
761+
762+
std::string response_str(res.second.begin(), res.second.end());
763+
nlohmann::ordered_json response = nlohmann::ordered_json::parse(response_str);
764+
765+
if (!response.contains("token")) {
766+
throw std::runtime_error("Docker registry token response missing 'token' field");
767+
}
768+
769+
return response["token"].get<std::string>();
770+
}
771+
772+
static std::string common_docker_resolve_model(const std::string & docker) {
773+
// Parse ai/smollm2:135M-Q4_K_M
774+
size_t colon_pos = docker.find(':');
775+
std::string repo, tag;
776+
if (colon_pos != std::string::npos) {
777+
repo = docker.substr(0, colon_pos);
778+
tag = docker.substr(colon_pos + 1);
779+
} else {
780+
repo = docker;
781+
tag = "latest";
782+
}
783+
784+
LOG_INF("Downloading Docker Model: %s:%s\n", repo.c_str(), tag.c_str());
785+
try {
786+
std::string token = common_docker_get_token(repo); // Get authentication token
787+
788+
// Get manifest
789+
std::string manifest_url = "https://registry-1.docker.io/v2/" + repo + "/manifests/" + tag;
790+
common_remote_params manifest_params;
791+
manifest_params.headers.push_back("Authorization: Bearer " + token);
792+
manifest_params.headers.push_back(
793+
"Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json");
794+
auto manifest_res = common_remote_get_content(manifest_url, manifest_params);
795+
if (manifest_res.first != 200) {
796+
throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(manifest_res.first));
797+
}
798+
799+
std::string manifest_str(manifest_res.second.begin(), manifest_res.second.end());
800+
nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(manifest_str);
801+
std::string gguf_digest; // Find the GGUF layer
802+
if (manifest.contains("layers")) {
803+
for (const auto & layer : manifest["layers"]) {
804+
if (layer.contains("mediaType")) {
805+
std::string media_type = layer["mediaType"].get<std::string>();
806+
if (media_type == "application/vnd.docker.ai.gguf.v3" ||
807+
media_type.find("gguf") != std::string::npos) {
808+
gguf_digest = layer["digest"].get<std::string>();
809+
break;
810+
}
811+
}
812+
}
813+
}
814+
815+
if (gguf_digest.empty()) {
816+
throw std::runtime_error("No GGUF layer found in Docker manifest");
817+
}
818+
819+
// Prepare local filename
820+
std::string model_filename = repo;
821+
std::replace(model_filename.begin(), model_filename.end(), '/', '_');
822+
model_filename += "_" + tag + ".gguf";
823+
std::string local_path = fs_get_cache_file(model_filename);
824+
825+
// Download the blob using common_download_file_single with is_docker=true
826+
std::string blob_url = "https://registry-1.docker.io/v2/" + repo + "/blobs/" + gguf_digest;
827+
if (!common_download_file_single(blob_url, local_path, token, false)) {
828+
throw std::runtime_error("Failed to download Docker blob");
829+
}
830+
831+
LOG_INF("Downloaded Docker Model to: %s\n", local_path.c_str());
832+
return local_path;
833+
} catch (const std::exception & e) {
834+
LOG_ERR("Docker Model download failed: %s\n", e.what());
835+
throw;
836+
}
837+
}
838+
748839
//
749840
// utils
750841
//
@@ -795,7 +886,9 @@ static handle_model_result common_params_handle_model(
795886
handle_model_result result;
796887
// handle pre-fill default model path and url based on hf_repo and hf_file
797888
{
798-
if (!model.hf_repo.empty()) {
889+
if (!model.docker.empty()) { // Handle Docker URLs by resolving them to local paths
890+
model.path = common_docker_resolve_model(model.docker);
891+
} else if (!model.hf_repo.empty()) {
799892
// short-hand to avoid specifying --hf-file -> default it to --model
800893
if (model.hf_file.empty()) {
801894
if (model.path.empty()) {
@@ -2636,6 +2729,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
26362729
params.model.url = value;
26372730
}
26382731
).set_env("LLAMA_ARG_MODEL_URL"));
2732+
add_opt(common_arg(
2733+
{ "-d", "-dr", "--docker", "--docker-repo" }, "<repo>/<model>[:quant]",
2734+
"Docker Hub model repository; quant is optional, default to latest.\n"
2735+
"example: ai/smollm2:135M-Q4_K_M\n"
2736+
"(default: unused)",
2737+
[](common_params & params, const std::string & value) { params.model.docker = value; })
2738+
.set_env("LLAMA_ARG_DOCKER"));
26392739
add_opt(common_arg(
26402740
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
26412741
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ struct common_params_model {
197197
std::string url = ""; // model url to download // NOLINT
198198
std::string hf_repo = ""; // HF repo // NOLINT
199199
std::string hf_file = ""; // HF file // NOLINT
200+
std::string docker = ""; // Docker Model url to download // NOLINT
200201
};
201202

202203
struct common_params_speculative {

0 commit comments

Comments
 (0)