@@ -6420,10 +6420,13 @@ struct llama_grammar_candidate {
64206420// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
64216421static std::pair<std::vector<uint32_t >, llama_partial_utf8> decode_utf8 (
64226422 const char * src,
6423+ size_t n_src,
64236424 llama_partial_utf8 partial_start) {
64246425 static const int lookup[] = { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 2 , 2 , 3 , 4 };
64256426 const char * pos = src;
64266427 std::vector<uint32_t > code_points;
6428+ // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
6429+ code_points.reserve (n_src + 1 );
64276430 uint32_t value = partial_start.value ;
64286431 int n_remain = partial_start.n_remain ;
64296432
@@ -6474,6 +6477,13 @@ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
64746477 return std::make_pair (std::move (code_points), llama_partial_utf8{ value, n_remain });
64756478}
64766479
6480+ static std::pair<std::vector<uint32_t >, llama_partial_utf8> decode_utf8 (
6481+ std::string src,
6482+ llama_partial_utf8 partial_start
6483+ ) {
6484+ return decode_utf8 (src.c_str (), src.size (), partial_start);
6485+ }
6486+
64776487// returns true iff pos points to the end of one of the definitions of a rule
64786488static bool llama_grammar_is_end_of_sequence (const llama_grammar_element * pos) {
64796489 switch (pos->type ) {
@@ -7123,7 +7133,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
71237133 } else if (piece.empty () || piece[0 ] == 0 ) {
71247134 candidates->data [i].logit = -INFINITY;
71257135 } else {
7126- candidates_decoded.push_back (decode_utf8 (piece. c_str () , grammar->partial_utf8 ));
7136+ candidates_decoded.push_back (decode_utf8 (piece, grammar->partial_utf8 ));
71277137 candidates_grammar.push_back ({ i, candidates_decoded.back ().first .data (), candidates_decoded.back ().second });
71287138 }
71297139 }
@@ -7330,7 +7340,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
73307340 const std::string piece = llama_token_to_piece (ctx, token);
73317341
73327342 // Note terminating 0 in decoded string
7333- const auto decoded = decode_utf8 (piece. c_str () , grammar->partial_utf8 );
7343+ const auto decoded = decode_utf8 (piece, grammar->partial_utf8 );
73347344 const auto & code_points = decoded.first ;
73357345 for (auto it = code_points.begin (), end = code_points.end () - 1 ; it != end; ++it) {
73367346 grammar->stacks = llama_grammar_accept (grammar->rules , grammar->stacks , *it);
0 commit comments