cont : bug fix

ggerganov · ggerganov · commit f9fa0e6a4069 · 2025-06-16T21:01:27.000+03:00
ggml-ci
diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
@@ -293,8 +293,6 @@ llama_batch_allocr::llama_batch_allocr() {
     for (auto & cur : seq_cpl) {
         cur.resize(LLAMA_MAX_SEQ);
     }
-
-    seq_idx.resize(LLAMA_MAX_SEQ);
 }
 
 bool llama_batch_allocr::init(
@@ -444,11 +442,6 @@ bool llama_batch_allocr::init(
         }
 
         seq_set.push_back(cur);
-
-        for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) {
-            seq_idx[batch.seq_id[i][s]].push_back(i);
-        }
-
         seq_set_map[cur].push_back(i);
     }
 
@@ -561,7 +554,7 @@ bool llama_batch_allocr::init(
             for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) {
                 const llama_seq_id seq_id = batch.seq_id[i][s];
 
-                cur_seq_set[seq_id] &= seq_set[seq_id];
+                cur_seq_set[seq_id] &= seq_set[i];
 
                 if (cur_seq_set[seq_id].none()) {
                     LLAMA_LOG_ERROR("%s: sequence %d belongs to incompatible sequence sets\n", __func__, seq_id);
@@ -779,10 +772,6 @@ void llama_batch_allocr::clear() {
 
     seq_set.clear();
 
-    for (auto & cur : seq_idx) {
-        cur.clear();
-    }
-
     seq_set_map.clear();
 }
 
diff --git a/src/llama-batch.h b/src/llama-batch.h
@@ -143,11 +143,9 @@ class llama_batch_allocr {
     std::vector<seq_cpl_t> seq_cpl; // seq_cpl[s0][s1]: if sequence s0 is coupled to sequence s1
 
     using idx_vec_t = std::vector<int32_t>;
-
     using seq_set_t = std::bitset<LLAMA_MAX_SEQ>;
 
     std::vector<seq_set_t> seq_set;
-    std::vector<idx_vec_t> seq_idx;
 
     std::unordered_map<seq_set_t, idx_vec_t> seq_set_map;
 
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -901,17 +901,17 @@ int llama_context::decode(const llama_batch & batch_inp) {
     const int64_t n_embd  = hparams.n_embd;
 
     // when computing embeddings, all tokens are output
-    const bool embd_all = cparams.embeddings;
+    const bool output_all = cparams.embeddings;
 
-    if (!batch_allocr->init(batch_inp, vocab, memory.get(), n_embd, embd_all)) {
+    if (!batch_allocr->init(batch_inp, vocab, memory.get(), n_embd, output_all)) {
         LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);
         return -1;
     }
 
     const uint32_t n_tokens_all  = batch_allocr->get_n_tokens();
     const uint32_t n_outputs_all = batch_allocr->get_n_outputs();
 
-    if (embd_all) {
+    if (output_all) {
         // require that all tokens are output
         if (n_outputs_all != n_tokens_all) {
             LLAMA_LOG_ERROR("%s: pooled embedding requires that all tokens are output (n_outputs_all = %d, n_tokens_all = %d)\n",
@@ -940,7 +940,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
     llama_memory_state_ptr mstate;
 
     while (true) {
-        mstate = memory->init_batch(batch_allocr.get(), cparams.n_ubatch, embd_all);
+        mstate = memory->init_batch(batch_allocr.get(), cparams.n_ubatch, output_all);
         if (!mstate) {
             return -2;
         }