Skip to content

Commit 134bc38

Browse files
authored
llama-bench : log benchmark progress (#9287)
* llama-bench : add optional progress messages
1 parent 815b1fb commit 134bc38

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ struct cmd_params {
249249
ggml_sched_priority prio;
250250
int delay;
251251
bool verbose;
252+
bool progress;
252253
output_formats output_format;
253254
output_formats output_format_stderr;
254255
};
@@ -280,6 +281,7 @@ static const cmd_params cmd_params_defaults = {
280281
/* prio */ GGML_SCHED_PRIO_NORMAL,
281282
/* delay */ 0,
282283
/* verbose */ false,
284+
/* progress */ false,
283285
/* output_format */ MARKDOWN,
284286
/* output_format_stderr */ NONE,
285287
};
@@ -319,6 +321,7 @@ static void print_usage(int /* argc */, char ** argv) {
319321
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
320322
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
321323
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
324+
printf(" --progress (default: %s)\n", cmd_params_defaults.progress ? "1" : "0");
322325
printf("\n");
323326
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
324327
}
@@ -364,6 +367,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
364367
params.numa = cmd_params_defaults.numa;
365368
params.prio = cmd_params_defaults.prio;
366369
params.delay = cmd_params_defaults.delay;
370+
params.progress = cmd_params_defaults.progress;
367371

368372
for (int i = 1; i < argc; i++) {
369373
arg = argv[i];
@@ -616,6 +620,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
616620
invalid_param = !output_format_from_str(argv[i], params.output_format_stderr);
617621
} else if (arg == "-v" || arg == "--verbose") {
618622
params.verbose = true;
623+
} else if (arg == "--progress") {
624+
params.progress = true;
619625
} else {
620626
invalid_param = true;
621627
break;
@@ -1523,7 +1529,13 @@ int main(int argc, char ** argv) {
15231529
llama_model * lmodel = nullptr;
15241530
const cmd_params_instance * prev_inst = nullptr;
15251531

1532+
int params_idx = 0;
1533+
auto params_count = params_instances.size();
15261534
for (const auto & inst : params_instances) {
1535+
params_idx ++;
1536+
if (params.progress) {
1537+
fprintf(stderr, "llama-bench: benchmark %d/%ld: starting\n", params_idx, params_count);
1538+
}
15271539
// keep the same model between tests when possible
15281540
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
15291541
if (lmodel) {
@@ -1556,7 +1568,7 @@ int main(int argc, char ** argv) {
15561568

15571569
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
15581570
if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) {
1559-
LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
1571+
fprintf(stderr, "%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
15601572
exit(1);
15611573
}
15621574
tpp.strict_cpu = t.cpu_strict;
@@ -1565,18 +1577,24 @@ int main(int argc, char ** argv) {
15651577

15661578
struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp);
15671579
if (!threadpool) {
1568-
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
1580+
fprintf(stderr, "%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
15691581
exit(1);
15701582
}
15711583

15721584
llama_attach_threadpool(ctx, threadpool, NULL);
15731585

15741586
// warmup run
15751587
if (t.n_prompt > 0) {
1588+
if (params.progress) {
1589+
fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup prompt run\n", params_idx, params_count);
1590+
}
15761591
//test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
15771592
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
15781593
}
15791594
if (t.n_gen > 0) {
1595+
if (params.progress) {
1596+
fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup generation run\n", params_idx, params_count);
1597+
}
15801598
test_gen(ctx, 1, 0, t.n_threads);
15811599
}
15821600

@@ -1586,9 +1604,15 @@ int main(int argc, char ** argv) {
15861604
uint64_t t_start = get_time_ns();
15871605

15881606
if (t.n_prompt > 0) {
1607+
if (params.progress) {
1608+
fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count, i + 1, params.reps);
1609+
}
15891610
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
15901611
}
15911612
if (t.n_gen > 0) {
1613+
if (params.progress) {
1614+
fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count, i + 1, params.reps);
1615+
}
15921616
test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads);
15931617
}
15941618

0 commit comments

Comments
 (0)