@@ -249,6 +249,7 @@ struct cmd_params {
249
249
ggml_sched_priority prio;
250
250
int delay;
251
251
bool verbose;
252
+ bool progress;
252
253
output_formats output_format;
253
254
output_formats output_format_stderr;
254
255
};
@@ -280,6 +281,7 @@ static const cmd_params cmd_params_defaults = {
280
281
/* prio */ GGML_SCHED_PRIO_NORMAL,
281
282
/* delay */ 0 ,
282
283
/* verbose */ false ,
284
+ /* progress */ false ,
283
285
/* output_format */ MARKDOWN,
284
286
/* output_format_stderr */ NONE,
285
287
};
@@ -319,6 +321,7 @@ static void print_usage(int /* argc */, char ** argv) {
319
321
printf (" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format ));
320
322
printf (" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format_stderr ));
321
323
printf (" -v, --verbose (default: %s)\n " , cmd_params_defaults.verbose ? " 1" : " 0" );
324
+ printf (" --progress (default: %s)\n " , cmd_params_defaults.progress ? " 1" : " 0" );
322
325
printf (" \n " );
323
326
printf (" Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n " );
324
327
}
@@ -616,6 +619,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
616
619
invalid_param = !output_format_from_str (argv[i], params.output_format_stderr );
617
620
} else if (arg == " -v" || arg == " --verbose" ) {
618
621
params.verbose = true ;
622
+ } else if (arg == " --progress" ) {
623
+ params.progress = true ;
619
624
} else {
620
625
invalid_param = true ;
621
626
break ;
@@ -1523,7 +1528,13 @@ int main(int argc, char ** argv) {
1523
1528
llama_model * lmodel = nullptr ;
1524
1529
const cmd_params_instance * prev_inst = nullptr ;
1525
1530
1531
+ int params_idx = 0 ;
1532
+ auto params_count = params_instances.size ();
1526
1533
for (const auto & inst : params_instances) {
1534
+ params_idx ++;
1535
+ if (params.progress ) {
1536
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: starting\n " , params_idx, params_count);
1537
+ }
1527
1538
// keep the same model between tests when possible
1528
1539
if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
1529
1540
if (lmodel) {
@@ -1573,10 +1584,16 @@ int main(int argc, char ** argv) {
1573
1584
1574
1585
// warmup run
1575
1586
if (t.n_prompt > 0 ) {
1587
+ if (params.progress ) {
1588
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup prompt run\n " , params_idx, params_count);
1589
+ }
1576
1590
// test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1577
1591
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1578
1592
}
1579
1593
if (t.n_gen > 0 ) {
1594
+ if (params.progress ) {
1595
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup generation run\n " , params_idx, params_count);
1596
+ }
1580
1597
test_gen (ctx, 1 , 0 , t.n_threads );
1581
1598
}
1582
1599
@@ -1586,9 +1603,15 @@ int main(int argc, char ** argv) {
1586
1603
uint64_t t_start = get_time_ns ();
1587
1604
1588
1605
if (t.n_prompt > 0 ) {
1606
+ if (params.progress ) {
1607
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: prompt run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
1608
+ }
1589
1609
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1590
1610
}
1591
1611
if (t.n_gen > 0 ) {
1612
+ if (params.progress ) {
1613
+ fprintf (stderr, " llama-bench: benchmark %d/%ld: generation run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
1614
+ }
1592
1615
test_gen (ctx, t.n_gen , t.n_prompt , t.n_threads );
1593
1616
}
1594
1617
0 commit comments