@@ -73,7 +73,7 @@ int main(int argc, char ** argv){
73
73
int n_drafted = 0 ;
74
74
int n_accept = 0 ;
75
75
76
- const int64_t t_start_ms = ggml_time_ms ();
76
+ const long t_start_ms = ggml_time_ms ();
77
77
78
78
// Iterate over input tokens in chunks of size n_ctx.
79
79
// Each chunk is treated as if a sequential generation but with pre-determined tokens to ensure reproducibility.
@@ -127,12 +127,19 @@ int main(int argc, char ** argv){
127
127
128
128
}
129
129
if (i_start > 0 && i_start / 100000 != (i_start - n_ctx) / 100000 ) {
130
- const int64_t t_now_ms = ggml_time_ms ();
131
- const int64_t eta_ms = (n_input - i_start) * (t_now_ms - t_start_ms) / i_start;
132
- const int64_t eta_min = eta_ms / (60 *1000 );
133
- const int64_t eta_s = (eta_ms - 60 *1000 *eta_min) / 1000 ;
134
-
135
- LOG_TEE (" %d/%d done, ETA: %02ld:%02ld\n " , i_start, n_input, eta_min, eta_s);
130
+ const long t_now_ms = ggml_time_ms ();
131
+ const long eta_ms = (n_input - i_start) * (t_now_ms - t_start_ms) / i_start;
132
+ const long eta_min = eta_ms / (60 *1000 );
133
+ const long eta_s = (eta_ms - 60 *1000 *eta_min) / 1000 ;
134
+
135
+ // %02ld doesn't compile on Arm64 MacOS:
136
+ std::string eta_string;
137
+ eta_string += eta_min < 10 ? " 0" : " " ;
138
+ eta_string += std::to_string (eta_min);
139
+ eta_string += " :" ;
140
+ eta_string += eta_s < 10 ? " 0" : " " ;
141
+ eta_string += std::to_string (eta_s);
142
+ LOG_TEE (" lookup-stats: %d/%d done, ETA: %s\n " , i_start, n_input, eta_string.c_str ());
136
143
}
137
144
138
145
// After each chunk, update the dynamic ngram cache with the context ngram cache:
0 commit comments