Skip to content

Commit 56fba2e

Browse files
committed
fix(chat): fix streaming parser for granite models
1 parent 2749662 commit 56fba2e

File tree

1 file changed

+23
-13
lines changed

1 file changed

+23
-13
lines changed

common/chat.cpp

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,15 +2025,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
20252025

20262026
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
20272027
// Parse thinking tags
2028+
static const common_regex start_think_regex(regex_escape("<think>"));
2029+
static const common_regex end_think_regex(regex_escape("</think>"));
2030+
// Granite models output partial tokens such as "<" and "<think".
2031+
// By leveraging try_consume_regex()/try_find_regex() throwing
2032+
// common_chat_msg_partial_exception for these partial tokens,
2033+
// processing is interrupted and the tokens are not passed to add_content().
2034+
if (auto res = builder.try_consume_regex(start_think_regex)) {
2035+
// Restore position for try_parse_reasoning()
2036+
builder.move_to(res->groups[0].begin);
2037+
builder.try_find_regex(end_think_regex, std::string::npos, false);
2038+
// Restore position for try_parse_reasoning()
2039+
builder.move_to(res->groups[0].begin);
2040+
}
20282041
builder.try_parse_reasoning("<think>", "</think>");
20292042

2030-
// Parse response tags using regex
2031-
static const common_regex response_regex("<response>([\\s\\S]*?)</response>");
2032-
if (auto res = builder.try_find_regex(response_regex)) {
2033-
// Extract the content between the tags (capture group 1)
2034-
auto content = builder.str(res->groups[1]);
2035-
builder.add_content(content);
2036-
builder.move_to(res->groups[0].end);
2043+
// Parse response tags
2044+
static const common_regex start_response_regex(regex_escape("<response>"));
2045+
static const common_regex end_response_regex(regex_escape("</response>"));
2046+
// Granite models output partial tokens such as "<" and "<response".
2047+
// Same hack as reasoning parsing.
2048+
if (builder.try_consume_regex(start_response_regex)) {
2049+
builder.try_find_regex(end_response_regex);
20372050
}
20382051

20392052
if (!builder.syntax().parse_tool_calls) {
@@ -2047,13 +2060,10 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
20472060
builder.move_to(res->groups[0].end);
20482061

20492062
// Expect JSON array of tool calls
2050-
auto tool_calls_data = builder.consume_json();
2051-
if (tool_calls_data.json.is_array()) {
2052-
if (!builder.add_tool_calls(tool_calls_data.json)) {
2053-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
2063+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
2064+
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
2065+
throw common_chat_msg_partial_exception("incomplete tool call");
20542066
}
2055-
} else {
2056-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
20572067
}
20582068
} else {
20592069
builder.add_content(builder.consume_rest());

0 commit comments

Comments
 (0)