Skip to content

Commit 554fd57

Browse files
authored
server : fix mtmd checkpoints (ggml-org#16591)
1 parent fa882fd commit 554fd57

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

tools/server/server.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3812,7 +3812,7 @@ struct server_context {
38123812
if (slot.n_past > 0 && slot.n_past < (int) slot.prompt.tokens.size()) {
38133813
const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id);
38143814
if (pos_min == -1) {
3815-
SLT_ERR(slot, "n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min);
3815+
SLT_ERR(slot, "n_past = %d, slot.prompt.tokens.size() = %d, seq_id = %d, pos_min = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min);
38163816
GGML_ABORT("pos_min == -1, but n_past > 0 - should not happen: https://github.com/ggml-org/llama.cpp/pull/13833#discussion_r2116181237");
38173817
}
38183818

@@ -3860,7 +3860,7 @@ struct server_context {
38603860
}
38613861

38623862
if (pos_min > pos_min_thold) {
3863-
SLT_WRN(slot, "n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min, n_swa);
3863+
SLT_WRN(slot, "n_past = %d, slot.prompt.tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min, n_swa);
38643864

38653865
// search for a context checkpoint
38663866
const auto it = std::find_if(
@@ -4028,7 +4028,7 @@ struct server_context {
40284028
}
40294029
}
40304030

4031-
// SLT_INF(slot, "new cache_tokens: %s\n", slot.cache_tokens.str().c_str());
4031+
// SLT_INF(slot, "new slot.prompt.tokens: %s\n", slot.slot.prompt.tokens.str().c_str());
40324032

40334033
SLT_INF(slot, "prompt processing progress, n_past = %d, n_tokens = %d, progress = %f\n", slot.n_past, batch.n_tokens, (float) slot.n_past / slot.n_prompt_tokens());
40344034

tools/server/utils.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,9 +1237,10 @@ struct server_tokens {
12371237
// allowed to resize ^ ^
12381238
// disallowed to resize ^ ^ ^
12391239
if (n > 0) {
1240-
llama_token last_token = tokens[n - 1];
12411240
// make sure we never remove tokens in the middle of an image
1242-
if (last_token == LLAMA_TOKEN_NULL) {
1241+
// note that the case where we keep a full image at the end is allowed:
1242+
// tokens[n - 1] == LLAMA_TOKEN_NULL && tokens[n] != LLAMA_TOKEN_NULL
1243+
if (tokens[n - 1] == LLAMA_TOKEN_NULL && tokens[n] == LLAMA_TOKEN_NULL) {
12431244
find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk
12441245
}
12451246
}

0 commit comments

Comments
 (0)