@@ -248,7 +248,6 @@ struct llama_vocab {
248
248
249
249
llama_trie special_token_trie;
250
250
std::unordered_map<token, id> special_token_to_id;
251
- std::vector<id> special_tokens;
252
251
size_t max_special_token_length;
253
252
};
254
253
@@ -539,14 +538,13 @@ struct llama_file_loader {
539
538
540
539
for (uint32_t i = 0 ; i < hparams.n_vocab_sp ; i++) {
541
540
uint32_t token_id = file.read_u32 ();
542
- const auto & token = vocab.id_to_token [token_id].tok ;
541
+ const auto & word = vocab.id_to_token [token_id].tok ;
543
542
544
- vocab.special_token_trie .add (token);
545
- vocab.special_tokens .push_back (token_id);
546
- vocab.special_token_to_id [token] = token_id;
543
+ vocab.special_token_trie .add (word);
544
+ vocab.special_token_to_id [word] = token_id;
547
545
548
- if (vocab.max_special_token_length < token .size ()) {
549
- vocab.max_special_token_length = token .size ();
546
+ if (vocab.max_special_token_length < word .size ()) {
547
+ vocab.max_special_token_length = word .size ();
550
548
}
551
549
}
552
550
}
@@ -641,9 +639,8 @@ struct llama_file_saver {
641
639
file.write_raw (token_score.tok .data (), token_score.tok .size ());
642
640
file.write_raw (&token_score.score , sizeof (token_score.score ));
643
641
}
644
- uint32_t n_vocab_sp = any_file_loader->hparams .n_vocab_sp ;
645
- for (uint32_t i = 0 ; i < n_vocab; i++) {
646
- file.write_u32 (any_file_loader->vocab .special_tokens [i]);
642
+ for (const auto & pair : any_file_loader->vocab .special_token_to_id ) {
643
+ file.write_u32 (pair.second );
647
644
}
648
645
}
649
646
void write_tensor (llama_load_tensor & tensor, enum ggml_type new_type, const void * new_data, size_t new_size) {
@@ -1964,24 +1961,23 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
1964
1961
return output;
1965
1962
}
1966
1963
1967
- auto offsets = vocab.special_token_trie .split (text);
1964
+ std::vector< int > offsets = vocab.special_token_trie .split (text);
1968
1965
int start = 0 ;
1969
1966
for (int end : offsets) {
1970
1967
if (start >= end) {
1971
1968
continue ;
1972
1969
}
1973
1970
1974
- size_t part_length = end - start;
1975
- // printf("\"%.*s\"\n", (int) part_length, text.c_str() + start);
1976
-
1977
- if (vocab.max_special_token_length < part_length) {
1978
- tokenizer.tokenize (text.c_str () + start, part_length, output);
1979
- } else {
1980
- auto token_it = vocab.special_token_to_id .find (std::string (text.c_str () + start, part_length));
1971
+ const char * part = text.c_str () + start;
1972
+ size_t part_len = end - start;
1973
+ if (vocab.max_special_token_length < part_len) {
1974
+ tokenizer.tokenize (part, part_len, output);
1975
+ } else {
1976
+ auto token_it = vocab.special_token_to_id .find (std::string (part, part_len));
1981
1977
if (token_it != vocab.special_token_to_id .end ()) {
1982
1978
output.push_back (token_it->second );
1983
1979
} else {
1984
- tokenizer.tokenize (text. c_str () + start, part_length , output);
1980
+ tokenizer.tokenize (part, part_len , output);
1985
1981
}
1986
1982
}
1987
1983
start = end;
@@ -3515,10 +3511,6 @@ llama_token llama_token_nl() {
3515
3511
return 13 ;
3516
3512
}
3517
3513
3518
- bool llama_is_special_token (const struct llama_context *ctx, llama_token token) {
3519
- return std::find (ctx->vocab .special_tokens .begin (), ctx->vocab .special_tokens .end (), token) != ctx->vocab .special_tokens .end ();
3520
- }
3521
-
3522
3514
3523
3515
void llama_print_timings (struct llama_context * ctx) {
3524
3516
const int64_t t_end_us = ggml_time_us ();
0 commit comments