context : revert llama_batch_allocr position change

ggerganov · ggerganov · commit e987482b537d · 2025-05-21T14:55:53.000+03:00
ggml-ci
diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
@@ -1,5 +1,6 @@
 #include "llama-batch.h"
 
+#include <cassert>
 #include <cstring>
 #include <algorithm>
 
@@ -281,9 +282,10 @@ llama_batch_allocr::llama_batch_allocr(struct llama_batch in_batch, llama_pos p0
     batch = in_batch;
     GGML_ASSERT(batch.n_tokens > 0);
     if (!batch.pos) {
+        assert(p0 >= 0);
         pos.resize(batch.n_tokens);
         for (int32_t i = 0; i < batch.n_tokens; i++) {
-            pos[i] = p0 + i + 1;
+            pos[i] = p0 + i;
         }
         batch.pos = pos.data();
     }
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -867,7 +867,7 @@ int llama_context::decode(llama_batch & inp_batch) {
     llama_kv_cache * kv_self = static_cast<llama_kv_cache *>(memory.get());
 
     // temporary allocate memory for the input batch if needed
-    llama_batch_allocr batch_allocr(inp_batch, inp_batch.pos ? -1 : kv_self->seq_pos_max(0));
+    llama_batch_allocr batch_allocr(inp_batch, inp_batch.pos ? -1 : kv_self->seq_pos_max(0) + 1);
 
     const llama_batch & batch = batch_allocr.batch;