attempt to reduce the impact of a worst-case scenario

Xarbirus · Xarbirus · commit 283ab2f278e6 · 2024-03-13T11:24:15.000+01:00
diff --git a/llama.cpp b/llama.cpp
@@ -9061,6 +9061,11 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
     // number of cells moved
     uint32_t n_moves = 0;
 
+    // each move requires 6*n_layer tensors (see build_defrag)
+    //   - source view, destination view, copy operation
+    //   - x2 for keys and values
+    const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer);
+
     // determine which KV cells to move where
     //
     //  cell i moves to ids[i]
@@ -9087,15 +9092,6 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
             nh++;
         }
 
-        // each move requires 6*n_layer tensors (see build_defrag)
-        //   - source view, destination view, copy operation
-        //   - x2 for keys and values
-        //
-        if (6*(n_moves + nh)*n_layer >= LLAMA_MAX_NODES) {
-            // the graph is too big, we cannot move more cells
-            break;
-        }
-
         uint32_t nf = 0;
         uint32_t is = n_kv - 1;
 
@@ -9125,11 +9121,19 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
         // are we moving a continuous block of memory?
         bool cont = false;
 
+        // should we stop searching for the next move?
+        bool stop = false;
+
         // go back and move the nf cells to the hole
         for (; i1 < n_kv; ++i1) {
             auto & cell1 = kv_self.cells[i1];
 
             if (cell1.is_empty() || ids[i1] != n_kv) {
+                if (n_moves == max_moves) {
+                    stop = true;
+                    break;
+                }
+
                 cont = false;
                 continue;
             }
@@ -9156,6 +9160,10 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
             }
         }
 
+        if (stop || n_moves == max_moves) {
+            break;
+        }
+
         //LLAMA_LOG_INFO("(tmp log) KV defrag: move [%u, %u) to [%u, %u)\n", is, i1 + 1, i0, i0 + nh);
 
         i0 += nh - 1;