Skip to content

Commit 2d8f549

Browse files
phymberttybalex
authored andcommitted
server : coherent log output for KV cache full (ggml-org#6637)
1 parent 5125684 commit 2d8f549

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

examples/server/server.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,7 @@ struct server_context {
10831083
};
10841084

10851085
if (llama_decode(ctx, batch_view) != 0) {
1086-
LOG_TEE("%s: llama_decode() failed\n", __func__);
1086+
LOG_ERROR("llama_decode() failed", {});
10871087
return;
10881088
}
10891089
}
@@ -1281,7 +1281,11 @@ struct server_context {
12811281
}
12821282

12831283
void send_error(const int id_task, const int id_multi, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
1284-
LOG_TEE("task %i - error: %s\n", id_task, error.c_str());
1284+
LOG_ERROR("task error", {
1285+
{"id_multi", id_multi},
1286+
{"id_task", id_task},
1287+
{"error", error},
1288+
});
12851289

12861290
server_task_result res;
12871291
res.id = id_task;
@@ -2186,7 +2190,11 @@ struct server_context {
21862190
if (ret != 0) {
21872191
if (n_batch == 1 || ret < 0) {
21882192
// if you get here, it means the KV cache is full - try increasing it via the context size
2189-
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
2193+
LOG_ERROR("failed to decode the batch: KV cache is full - try increasing it via the context size", {
2194+
{"i", i},
2195+
{"n_batch", ret},
2196+
{"ret", ret},
2197+
});
21902198
for (auto & slot : slots) {
21912199
slot.state = SLOT_STATE_PROCESSING;
21922200
slot.command = SLOT_COMMAND_NONE;
@@ -2196,12 +2204,16 @@ struct server_context {
21962204
break; // break loop of n_batch
21972205
}
21982206

2199-
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
2200-
22012207
// retry with half the batch size to try to find a free slot in the KV cache
22022208
n_batch /= 2;
22032209
i -= n_batch;
22042210

2211+
LOG_WARNING("failed to find free space in the KV cache, retrying with smaller batch size - try increasing it via the context size or enable defragmentation", {
2212+
{"i", i},
2213+
{"n_batch", n_batch},
2214+
{"ret", ret},
2215+
});
2216+
22052217
continue; // continue loop of n_batch
22062218
}
22072219

0 commit comments

Comments
 (0)