@@ -249,6 +249,7 @@ struct cmd_params {
249
249
ggml_sched_priority prio;
250
250
int delay;
251
251
bool verbose;
252
+ bool progress;
252
253
output_formats output_format;
253
254
output_formats output_format_stderr;
254
255
};
@@ -280,6 +281,7 @@ static const cmd_params cmd_params_defaults = {
280
281
/* prio */ GGML_SCHED_PRIO_NORMAL,
281
282
/* delay */ 0 ,
282
283
/* verbose */ false ,
284
+ /* progress */ false ,
283
285
/* output_format */ MARKDOWN,
284
286
/* output_format_stderr */ NONE,
285
287
};
@@ -319,6 +321,7 @@ static void print_usage(int /* argc */, char ** argv) {
319
321
printf (" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format ));
320
322
printf (" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format_stderr ));
321
323
printf (" -v, --verbose (default: %s)\n " , cmd_params_defaults.verbose ? " 1" : " 0" );
324
+ printf (" --progress (default: %s)\n " , cmd_params_defaults.progress ? " 1" : " 0" );
322
325
printf (" \n " );
323
326
printf (" Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n " );
324
327
}
@@ -364,6 +367,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
364
367
params.numa = cmd_params_defaults.numa ;
365
368
params.prio = cmd_params_defaults.prio ;
366
369
params.delay = cmd_params_defaults.delay ;
370
+ params.progress = cmd_params_defaults.progress ;
367
371
368
372
for (int i = 1 ; i < argc; i++) {
369
373
arg = argv[i];
@@ -616,6 +620,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
616
620
invalid_param = !output_format_from_str (argv[i], params.output_format_stderr );
617
621
} else if (arg == " -v" || arg == " --verbose" ) {
618
622
params.verbose = true ;
623
+ } else if (arg == " --progress" ) {
624
+ params.progress = true ;
619
625
} else {
620
626
invalid_param = true ;
621
627
break ;
@@ -1523,7 +1529,13 @@ int main(int argc, char ** argv) {
1523
1529
llama_model * lmodel = nullptr ;
1524
1530
const cmd_params_instance * prev_inst = nullptr ;
1525
1531
1532
+ int params_idx = 0 ;
1533
+ auto params_count = params_instances.size ();
1526
1534
for (const auto & inst : params_instances) {
1535
+ params_idx ++;
1536
+ if (params.progress ) {
1537
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: starting\n " , params_idx, params_count);
1538
+ }
1527
1539
// keep the same model between tests when possible
1528
1540
if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
1529
1541
if (lmodel) {
@@ -1556,7 +1568,7 @@ int main(int argc, char ** argv) {
1556
1568
1557
1569
struct ggml_threadpool_params tpp = ggml_threadpool_params_default (t.n_threads );
1558
1570
if (!parse_cpu_mask (t.cpu_mask , tpp.cpumask )) {
1559
- LOG_TEE ( " %s: failed to parse cpu-mask: %s\n " , __func__, t.cpu_mask .c_str ());
1571
+ fprintf (stderr, " %s: failed to parse cpu-mask: %s\n " , __func__, t.cpu_mask .c_str ());
1560
1572
exit (1 );
1561
1573
}
1562
1574
tpp.strict_cpu = t.cpu_strict ;
@@ -1565,18 +1577,24 @@ int main(int argc, char ** argv) {
1565
1577
1566
1578
struct ggml_threadpool * threadpool = ggml_threadpool_new (&tpp);
1567
1579
if (!threadpool) {
1568
- LOG_TEE ( " %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1580
+ fprintf (stderr, " %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1569
1581
exit (1 );
1570
1582
}
1571
1583
1572
1584
llama_attach_threadpool (ctx, threadpool, NULL );
1573
1585
1574
1586
// warmup run
1575
1587
if (t.n_prompt > 0 ) {
1588
+ if (params.progress ) {
1589
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup prompt run\n " , params_idx, params_count);
1590
+ }
1576
1591
// test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1577
1592
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1578
1593
}
1579
1594
if (t.n_gen > 0 ) {
1595
+ if (params.progress ) {
1596
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup generation run\n " , params_idx, params_count);
1597
+ }
1580
1598
test_gen (ctx, 1 , 0 , t.n_threads );
1581
1599
}
1582
1600
@@ -1586,9 +1604,15 @@ int main(int argc, char ** argv) {
1586
1604
uint64_t t_start = get_time_ns ();
1587
1605
1588
1606
if (t.n_prompt > 0 ) {
1607
+ if (params.progress ) {
1608
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: prompt run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
1609
+ }
1589
1610
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1590
1611
}
1591
1612
if (t.n_gen > 0 ) {
1613
+ if (params.progress ) {
1614
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: generation run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
1615
+ }
1592
1616
test_gen (ctx, t.n_gen , t.n_prompt , t.n_threads );
1593
1617
}
1594
1618
0 commit comments