You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// if you get here, it means the KV cache is full - try increasing it via the context size
2189
-
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
2193
+
LOG_ERROR("failed to decode the batch: KV cache is full - try increasing it via the context size", {
2194
+
{"i", i},
2195
+
{"n_batch", ret},
2196
+
{"ret", ret},
2197
+
});
2190
2198
for (auto & slot : slots) {
2191
2199
slot.state = SLOT_STATE_PROCESSING;
2192
2200
slot.command = SLOT_COMMAND_NONE;
@@ -2196,12 +2204,16 @@ struct server_context {
2196
2204
break; // break loop of n_batch
2197
2205
}
2198
2206
2199
-
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
2200
-
2201
2207
// retry with half the batch size to try to find a free slot in the KV cache
2202
2208
n_batch /= 2;
2203
2209
i -= n_batch;
2204
2210
2211
+
LOG_WARNING("failed to find free space in the KV cache, retrying with smaller batch size - try increasing it via the context size or enable defragmentation", {
0 commit comments