Skip to content

Commit 283ab2f

Browse files
committed
attempt to reduce the impact of a worst-case scenario
1 parent bcebd7d commit 283ab2f

File tree

1 file changed

+17
-9
lines changed

1 file changed

+17
-9
lines changed

llama.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9061,6 +9061,11 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
90619061
// number of cells moved
90629062
uint32_t n_moves = 0;
90639063

9064+
// each move requires 6*n_layer tensors (see build_defrag)
9065+
// - source view, destination view, copy operation
9066+
// - x2 for keys and values
9067+
const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer);
9068+
90649069
// determine which KV cells to move where
90659070
//
90669071
// cell i moves to ids[i]
@@ -9087,15 +9092,6 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
90879092
nh++;
90889093
}
90899094

9090-
// each move requires 6*n_layer tensors (see build_defrag)
9091-
// - source view, destination view, copy operation
9092-
// - x2 for keys and values
9093-
//
9094-
if (6*(n_moves + nh)*n_layer >= LLAMA_MAX_NODES) {
9095-
// the graph is too big, we cannot move more cells
9096-
break;
9097-
}
9098-
90999095
uint32_t nf = 0;
91009096
uint32_t is = n_kv - 1;
91019097

@@ -9125,11 +9121,19 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
91259121
// are we moving a continuous block of memory?
91269122
bool cont = false;
91279123

9124+
// should we stop searching for the next move?
9125+
bool stop = false;
9126+
91289127
// go back and move the nf cells to the hole
91299128
for (; i1 < n_kv; ++i1) {
91309129
auto & cell1 = kv_self.cells[i1];
91319130

91329131
if (cell1.is_empty() || ids[i1] != n_kv) {
9132+
if (n_moves == max_moves) {
9133+
stop = true;
9134+
break;
9135+
}
9136+
91339137
cont = false;
91349138
continue;
91359139
}
@@ -9156,6 +9160,10 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
91569160
}
91579161
}
91589162

9163+
if (stop || n_moves == max_moves) {
9164+
break;
9165+
}
9166+
91599167
//LLAMA_LOG_INFO("(tmp log) KV defrag: move [%u, %u) to [%u, %u)\n", is, i1 + 1, i0, i0 + nh);
91609168

91619169
i0 += nh - 1;

0 commit comments

Comments
 (0)