@@ -1516,10 +1516,10 @@ int main(int argc, char ** argv) {
1516
1516
llama_model * lmodel = nullptr ;
1517
1517
const cmd_params_instance * prev_inst = nullptr ;
1518
1518
1519
- int params_idx = 1 ;
1519
+ int params_idx = 0 ;
1520
1520
for (const auto & inst : params_instances) {
1521
- LOG_TEE (" llama-bench: starting benchmark %d/%ld\n " , params_idx, params_instances.size ());
1522
1521
params_idx ++;
1522
+ LOG_TEE (" llama-bench: benchmark %d/%ld: starting\n " , params_idx, params_instances.size ());
1523
1523
// keep the same model between tests when possible
1524
1524
if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
1525
1525
if (lmodel) {
@@ -1569,10 +1569,12 @@ int main(int argc, char ** argv) {
1569
1569
1570
1570
// warmup run
1571
1571
if (t.n_prompt > 0 ) {
1572
+ LOG_TEE (" llama-bench: benchmark %d/%ld: warmup prompt run\n " , params_idx, params_instances.size ());
1572
1573
// test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1573
1574
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1574
1575
}
1575
1576
if (t.n_gen > 0 ) {
1577
+ LOG_TEE (" llama-bench: benchmark %d/%ld: warmup generation run\n " , params_idx, params_instances.size ());
1576
1578
test_gen (ctx, 1 , 0 , t.n_threads );
1577
1579
}
1578
1580
@@ -1582,9 +1584,11 @@ int main(int argc, char ** argv) {
1582
1584
uint64_t t_start = get_time_ns ();
1583
1585
1584
1586
if (t.n_prompt > 0 ) {
1587
+ LOG_TEE (" llama-bench: benchmark %d/%ld: prompt run %d/%d\n " , params_idx, params_instances.size (), i + 1 , params.reps );
1585
1588
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1586
1589
}
1587
1590
if (t.n_gen > 0 ) {
1591
+ LOG_TEE (" llama-bench: benchmark %d/%ld: generation run %d/%d\n " , params_idx, params_instances.size (), i + 1 , params.reps );
1588
1592
test_gen (ctx, t.n_gen , t.n_prompt , t.n_threads );
1589
1593
}
1590
1594
0 commit comments