@@ -165,6 +165,19 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
165165 throw std::runtime_error (" Invalid tool_choice: " + tool_choice);
166166}
167167
168+ bool common_chat_templates_support_enable_thinking (const common_chat_templates * chat_templates) {
169+ common_chat_templates_inputs dummy_inputs;
170+ common_chat_msg msg;
171+ msg.role = " user" ;
172+ msg.content = " test" ;
173+ dummy_inputs.messages = {msg};
174+ dummy_inputs.enable_thinking = false ;
175+ const auto rendered_no_thinking = common_chat_templates_apply (chat_templates, dummy_inputs);
176+ dummy_inputs.enable_thinking = true ;
177+ const auto rendered_with_thinking = common_chat_templates_apply (chat_templates, dummy_inputs);
178+ return rendered_no_thinking.prompt != rendered_with_thinking.prompt ;
179+ }
180+
168181template <>
169182std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat (const json & messages) {
170183 std::vector<common_chat_msg> msgs;
@@ -619,6 +632,7 @@ const char * common_chat_format_name(common_chat_format format) {
619632 case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return " FireFunction v2" ;
620633 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return " Functionary v3.2" ;
621634 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return " Functionary v3.1 Llama 3.1" ;
635+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return " DeepSeek V3.1" ;
622636 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return " Hermes 2 Pro" ;
623637 case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
624638 case COMMON_CHAT_FORMAT_GRANITE: return " Granite" ;
@@ -687,11 +701,13 @@ static void parse_json_tool_calls(
687701 size_t from = std::string::npos;
688702 auto first = true ;
689703 while (true ) {
704+ auto start_pos = builder.pos ();
690705 auto res = function_regex_start_only && first
691706 ? builder.try_consume_regex (*function_regex_start_only)
692707 : function_regex
693708 ? builder.try_find_regex (*function_regex, from)
694709 : std::nullopt ;
710+
695711 if (res) {
696712 std::string name;
697713 if (get_function_name) {
@@ -726,6 +742,8 @@ static void parse_json_tool_calls(
726742 return ;
727743 }
728744 throw common_chat_msg_partial_exception (" incomplete tool call" );
745+ } else {
746+ builder.move_to (start_pos);
729747 }
730748 break ;
731749 }
@@ -1316,6 +1334,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
13161334 }
13171335 return data;
13181336}
1337+
1338+ static common_chat_params common_chat_params_init_deepseek_v3_1 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1339+ common_chat_params data;
1340+
1341+ // Pass thinking context for DeepSeek V3.1 template
1342+ json additional_context = {
1343+ {" thinking" , inputs.enable_thinking },
1344+ };
1345+
1346+ auto prompt = apply (tmpl, inputs,
1347+ /* messages_override= */ inputs.messages ,
1348+ /* tools_override= */ std::nullopt ,
1349+ additional_context);
1350+ data.prompt = prompt;
1351+ data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
1352+ if (string_ends_with (data.prompt , " <think>" )) {
1353+ if (!inputs.enable_thinking ) {
1354+ data.prompt += " </think>" ;
1355+ } else {
1356+ data.thinking_forced_open = true ;
1357+ }
1358+ }
1359+ if (inputs.tools .is_array () && !inputs.tools .empty ()) {
1360+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema .is_null ();
1361+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1362+ std::vector<std::string> tool_rules;
1363+ foreach_function (inputs.tools , [&](const json & tool) {
1364+ const auto & function = tool.at (" function" );
1365+ std::string name = function.at (" name" );
1366+ auto parameters = function.at (" parameters" );
1367+ builder.resolve_refs (parameters);
1368+ tool_rules.push_back (builder.add_rule (name + " -call" ,
1369+ " ( \" <|tool▁call▁begin|>\" )? \" " + name + " <|tool▁sep|>"
1370+ " \" " + builder.add_schema (name + " -args" , parameters) + " "
1371+ " \" <|tool▁call▁end|>\" " ));
1372+ });
1373+ // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1374+ // so we accept common variants (then it's all constrained)
1375+ builder.add_rule (" root" ,
1376+ std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1377+ " ( \" <|tool▁calls▁begin|>\" | \" <|tool_calls_begin|>\" | \" <|tool calls begin|>\" | \" <|tool\\\\ _calls\\\\ _begin|>\" | \" <|tool▁calls|>\" ) "
1378+ " (" + string_join (tool_rules, " | " ) + " )" + (inputs.parallel_tool_calls ? " *" : " " ) + " "
1379+ " \" <|tool▁calls▁end|>\" "
1380+ " space" );
1381+ data.grammar_triggers .push_back ({
1382+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1383+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1384+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1385+ std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) +
1386+ " (<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>|<|tool▁calls|>)[\\ s\\ S]*"
1387+ });
1388+ data.preserved_tokens = {
1389+ " <think>" ,
1390+ " </think>" ,
1391+ " <|tool▁calls▁begin|>" ,
1392+ " <|tool▁call▁begin|>" ,
1393+ " <|tool▁sep|>" ,
1394+ " <|tool▁call▁end|>" ,
1395+ " <|tool▁calls▁end|>" ,
1396+ };
1397+ });
1398+ }
1399+ return data;
1400+ }
1401+
13191402static void common_chat_parse_deepseek_r1 (common_chat_msg_parser & builder) {
13201403 builder.try_parse_reasoning (" <think>" , " </think>" );
13211404 if (!builder.syntax ().parse_tool_calls ) {
@@ -1337,6 +1420,66 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
13371420 tool_calls_end);
13381421}
13391422
1423+ static void common_chat_parse_deepseek_v3_1_content (common_chat_msg_parser & builder) {
1424+ static const common_regex function_regex (" (?:<|tool▁call▁begin|>)?([^\\ n<]+)(?:<|tool▁sep|>)" );
1425+
1426+ static const common_regex close_regex (" (?:[\\ s]*)?<|tool▁call▁end|>" );
1427+ static const common_regex tool_calls_begin (" (?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>|<|tool▁calls|>)" );
1428+ static const common_regex tool_calls_end (" <|tool▁calls▁end|>" );
1429+
1430+ if (!builder.syntax ().parse_tool_calls ) {
1431+ LOG (" %s: not parse_tool_calls\n " , __func__);
1432+ builder.add_content (builder.consume_rest ());
1433+ return ;
1434+ }
1435+
1436+ LOG (" %s: parse_tool_calls\n " , __func__);
1437+
1438+ parse_json_tool_calls (
1439+ builder,
1440+ /* block_open= */ tool_calls_begin,
1441+ /* function_regex_start_only= */ std::nullopt ,
1442+ function_regex,
1443+ close_regex,
1444+ tool_calls_end);
1445+ }
1446+
1447+ static void common_chat_parse_deepseek_v3_1 (common_chat_msg_parser & builder) {
1448+ // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
1449+ // First try to parse using the standard reasoning parsing method
1450+ LOG (" %s: thinking_forced_open: %s\n " , __func__, std::to_string (builder.syntax ().thinking_forced_open ).c_str ());
1451+
1452+ auto start_pos = builder.pos ();
1453+ auto found_end_think = builder.try_find_literal (" </think>" );
1454+ builder.move_to (start_pos);
1455+
1456+ if (builder.syntax ().thinking_forced_open && !builder.is_partial () && !found_end_think) {
1457+ LOG (" %s: no end_think, not partial, adding content\n " , __func__);
1458+ common_chat_parse_deepseek_v3_1_content (builder);
1459+ } else if (builder.try_parse_reasoning (" <think>" , " </think>" )) {
1460+ // If reasoning was parsed successfully, the remaining content is regular content
1461+ LOG (" %s: parsed reasoning, adding content\n " , __func__);
1462+ // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
1463+ common_chat_parse_deepseek_v3_1_content (builder);
1464+ } else {
1465+ if (builder.syntax ().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
1466+ LOG (" %s: reasoning_format none, adding content\n " , __func__);
1467+ common_chat_parse_deepseek_v3_1_content (builder);
1468+ return ;
1469+ }
1470+ // If no reasoning tags found, check if we should treat everything as reasoning
1471+ if (builder.syntax ().thinking_forced_open ) {
1472+ // If thinking is forced open but no tags found, treat everything as reasoning
1473+ LOG (" %s: thinking_forced_open, adding reasoning content\n " , __func__);
1474+ builder.add_reasoning_content (builder.consume_rest ());
1475+ } else {
1476+ LOG (" %s: no thinking_forced_open, adding content\n " , __func__);
1477+ // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
1478+ common_chat_parse_deepseek_v3_1_content (builder);
1479+ }
1480+ }
1481+ }
1482+
13401483static common_chat_params common_chat_params_init_gpt_oss (const common_chat_template & tmpl, const struct templates_params & inputs) {
13411484 common_chat_params data;
13421485 auto prompt = apply (tmpl, inputs);
@@ -1833,7 +1976,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
18331976 // If thinking_forced_open, then we capture the </think> tag in the grammar,
18341977 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
18351978 std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1836- " ( \\ s*"
1979+ " \\ s*( "
18371980 " (?:<tool_call>"
18381981 " |<function"
18391982 " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
@@ -2124,6 +2267,12 @@ static common_chat_params common_chat_templates_apply_jinja(
21242267 }
21252268 }
21262269
2270+ // DeepSeek V3.1: detect based on specific patterns in the template
2271+ if (src.find (" message['prefix'] is defined and message['prefix'] and thinking" ) != std::string::npos &&
2272+ params.json_schema .is_null ()) {
2273+ return common_chat_params_init_deepseek_v3_1 (tmpl, params);
2274+ }
2275+
21272276 // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
21282277 if (src.find (" <|tool▁calls▁begin|>" ) != std::string::npos && params.json_schema .is_null ()) {
21292278 return common_chat_params_init_deepseek_r1 (tmpl, params);
@@ -2286,6 +2435,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
22862435 case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
22872436 common_chat_parse_deepseek_r1 (builder);
22882437 break ;
2438+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
2439+ common_chat_parse_deepseek_v3_1 (builder);
2440+ break ;
22892441 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
22902442 common_chat_parse_functionary_v3_2 (builder);
22912443 break ;
0 commit comments