@@ -975,14 +975,15 @@ static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
975
975
(void) tensor;
976
976
}
977
977
978
- static std::string llama_token_to_str (const struct llama_context * ctx, llama_token token) {
978
+ static std::string llama_token_to_piece (const struct llama_context * ctx, llama_token token) {
979
979
std::vector<char> result(8, 0);
980
980
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
981
981
if (n_tokens < 0) {
982
982
result.resize(-n_tokens);
983
983
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
984
984
GGML_ASSERT(check == -n_tokens);
985
- } else {
985
+ }
986
+ else {
986
987
result.resize(n_tokens);
987
988
}
988
989
@@ -1202,10 +1203,10 @@ struct llama_vocab {
1202
1203
id special_eot_id = 32010;
1203
1204
1204
1205
int find_bpe_rank(std::string token_left, std::string token_right) const {
1205
- replace_all (token_left, " " , " \u0120 " );
1206
- replace_all (token_left, " \n " , " \u010A " );
1207
- replace_all (token_right, " " , " \u0120 " );
1208
- replace_all (token_right, " \n " , " \u010A " );
1206
+ GGML_ASSERT (token_left.find( " ") == std::string::npos );
1207
+ GGML_ASSERT (token_left.find( "\n") == std::string::npos );
1208
+ GGML_ASSERT (token_right.find( " ") == std::string::npos );
1209
+ GGML_ASSERT (token_right.find( "\n") == std::string::npos );
1209
1210
1210
1211
auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
1211
1212
if (it == bpe_ranks.end()) {
@@ -7499,7 +7500,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
7499
7500
7500
7501
for (size_t i = 0; i < candidates->size; ++i) {
7501
7502
const llama_token id = candidates->data[i].id;
7502
- const std::string piece = llama_token_to_str (ctx, id);
7503
+ const std::string piece = llama_token_to_piece (ctx, id);
7503
7504
if (id == eos) {
7504
7505
if (!allow_eos) {
7505
7506
candidates->data[i].logit = -INFINITY;
@@ -7711,7 +7712,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
7711
7712
GGML_ASSERT(false);
7712
7713
}
7713
7714
7714
- const std::string piece = llama_token_to_str (ctx, token);
7715
+ const std::string piece = llama_token_to_piece (ctx, token);
7715
7716
7716
7717
// Note terminating 0 in decoded string
7717
7718
const auto decoded = decode_utf8(piece.c_str(), grammar->partial_utf8);
0 commit comments