6
6
void llama_ngram_cache_update (llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
7
7
std::vector<llama_token> & inp, int nnew, bool print_progress) {
8
8
const int64_t t_start_ms = ggml_time_ms ();
9
- const int inp_size = inp.size ();
9
+ const int64_t inp_size = inp.size ();
10
10
11
- const int n_todo = inp_size * (ngram_max - ngram_min + 1 );
12
- int n_done = 0 ;
11
+ const int64_t n_todo = inp_size * (ngram_max - ngram_min + 1 );
12
+ int64_t n_done = 0 ;
13
13
14
- for (int ngram_size = ngram_min; ngram_size <= ngram_max; ++ngram_size) {
15
- const int i_start = std::max (inp_size - nnew, ngram_size);
16
- for (int i = i_start; i < inp_size; ++i) {
17
- const int ngram_start = i - ngram_size;
14
+ for (int64_t ngram_size = ngram_min; ngram_size <= ngram_max; ++ngram_size) {
15
+ const int64_t i_start = std::max (inp_size - nnew, ngram_size);
16
+ for (int64_t i = i_start; i < inp_size; ++i) {
17
+ const int64_t ngram_start = i - ngram_size;
18
18
llama_ngram ngram (&inp[ngram_start], ngram_size);
19
19
const llama_token token = inp[i];
20
20
@@ -39,7 +39,7 @@ void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, in
39
39
const int64_t eta_min = eta_ms / (60 *1000 );
40
40
const int64_t eta_s = (eta_ms - 60 *1000 *eta_min) / 1000 ;
41
41
42
- fprintf (stderr, " %s: %d/%d done, ETA: %02ld:%02ld\n " , __func__, n_done, n_todo, eta_min, eta_s);
42
+ fprintf (stderr, " %s: %ld/%ld done, ETA: %02ld:%02ld\n " , __func__, n_done, n_todo, eta_min, eta_s);
43
43
}
44
44
}
45
45
}
0 commit comments