diff --git a/common/chat.cpp b/common/chat.cpp index 26a1d7cce4e78..f7331703cb813 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -580,10 +580,7 @@ static common_chat_msg parse_json_tool_calls( } if (!result.tool_calls.empty()) { - if (!string_strip(result.content).empty()) { - LOG_WRN("Content found with tool calls: %s\n", result.content.c_str()); - } - result.content = ""; + result.content = string_strip(result.content); } return result; } @@ -1359,14 +1356,15 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa std::string name = function.at("name"); auto parameters = function.at("parameters"); builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_schema(name + "-call", { + auto call_rule = builder.add_schema(name + "-call", { {"type", "object"}, {"properties", { {"name", {{"const", name}}}, {"arguments", parameters}, }}, {"required", json::array({"name", "arguments"})}, - })); + }); + tool_rules.push_back(builder.add_rule(name + "-call", "\"<|tool_call|>\" " + call_rule + " \"<|/tool_call|>\"")); }); auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space"); std::vector alt_tags { @@ -1379,6 +1377,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa data.preserved_tokens = { "<|tool_call|>", "", + "<|tool_response|>", + "<|tool|>", + "", }; }); @@ -1437,89 +1438,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa } static common_chat_msg common_chat_parse_phi_4(const std::string & input) { - common_chat_msg result; - result.role = "assistant"; - - std::string final_content = ""; - - const std::string opening_tag = "<|tool_call|>"; - const std::string closing_tag = ""; - - size_t start_pos = 0; - while (true) { - // Find next tool call - size_t tool_start = input.find(opening_tag, start_pos); - if (tool_start == std::string::npos) { - // No more tool calls. - - // Is start_pos within string bounds? - if (start_pos < input.length()) { - // Add the rest of the string to final_content - final_content += input.substr(start_pos); - } - break; - } - - // Add content before the tool call to final_content - final_content += input.substr(start_pos, tool_start - start_pos); - - // Find closing tag - size_t content_start = tool_start + opening_tag.length(); - size_t tool_end = input.find(closing_tag, content_start); - - if (tool_end == std::string::npos) { - // No closing tag found, so just include the rest of the string as tool. - tool_end = input.length(); - } - - // Extract tool call content - std::string tool_content = input.substr( - content_start, - tool_end - content_start - ); - - // Try to parse the tool call - try { - auto tool_call = json::parse(tool_content); - - // Verify the required fields exist - if (!tool_call.contains("name")) { - throw std::runtime_error("Missing 'name' field in tool call"); - } - - if (!tool_call.contains("arguments")) { - throw std::runtime_error("Missing 'arguments' field in tool call"); - } - - std::string name = tool_call["name"].get(); - - std::string arguments; - try { - arguments = tool_call["arguments"].dump(); - } catch (const std::exception & e) { - LOG_ERR("Failed to serialize arguments: %s\n", e.what()); - arguments = "{}"; - } - - result.tool_calls.push_back({ - name, - arguments, - /* id= */ "", - }); - } catch (const std::exception & e) { - // If parsing fails, include the entire tool call in the content - final_content += input.substr( - tool_start, - tool_end + closing_tag.length() - tool_start - ); - } - - // Move past this tool call for next iteration - start_pos = tool_end + closing_tag.length(); - } - - result.content = final_content; - return result; + static std::regex function_regex("<\\|tool_call\\|>\\s*\\{\\s*\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"arguments\"\\s*:"); + static std::regex close_regex(R"(\}\s*()?)"); + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); } diff --git a/docs/function-calling.md b/docs/function-calling.md index c3873c3fa63d1..62261077dd508 100644 --- a/docs/function-calling.md +++ b/docs/function-calling.md @@ -12,11 +12,12 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2 - Functionary v3.1 / v3.2 - Hermes 2/3, Qwen 2.5 - - Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034) + - Qwen 2.5 Coder (#12034) - Mistral Nemo - Firefunction v2 - - Command R7B - - DeepSeek R1 (WIP / seems reluctant to call any tools?) + - Command R7B (#11585) + - DeepSeek R1 (#11607) + - Phi 4 (#12288) - Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs). - Use `--chat-template-file` to override the template when appropriate (see examples below) @@ -297,9 +298,14 @@ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \ --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja +# Native support for Phi 4 also needs a template override (official template is buggy) + +llama-server --jinja -fa -hf bartowski/microsoft_Phi-4-mini-instruct-GGUF \ + --chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja + # Native support requires the right template for these GGUFs: -llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M +llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M \ --chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8386f4eebba48..585bc2065c1eb 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -384,7 +384,8 @@ struct server_task { SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str()); common_grammar_trigger trigger; trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN; - trigger.value = (llama_token) token; + trigger.value = word; + trigger.token = token; params.sampling.grammar_triggers.push_back(trigger); } else { SRV_DBG("Grammar trigger word: `%s`\n", word.c_str()); diff --git a/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja b/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja new file mode 100644 index 0000000000000..9f19e57f94802 --- /dev/null +++ b/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja @@ -0,0 +1,35 @@ +{%- if messages[0]["role"] == "system" %} + {%- set system_message = messages[0]["content"] %} +{% elif tools is defined -%} + {%- set system_message = "You are a helpful assistant with access to tools." -%} +{% else %} + {%- set system_message = "" -%} +{%- endif %} +{%- if tools is defined -%} + {%- set system_message = system_message + '<|tool|>' + (tools | tojson) + '<|/tool|>' -%} + {%- if '<|tool_call|>' not in system_message -%} + {%- set system_message = system_message + "\nTo use a tool, respond in this format: <|tool_call|>{\"name\": \"foo\", \"arguments\": {\"a\": 1}}<|/tool_call|>" %} + {%- endif %} +{%- endif %} +{%- if system_message is defined -%} + {{- '<|system|>' + system_message + '<|end|>' -}} +{%- endif -%} +{%- for message in messages -%} + {%- if message['role'] == 'tool' -%} + {{- '<|tool_response|>' + (message['content'] | tojson) + '<|/tool_response|>' -}} + {%- elif message['role'] != 'system' -%} + {{- '<|' + message['role'] + '|>' -}} + {%- if message.content -%} + {{- message['content'] -}} + {%- endif -%} + {%- for tool_call in message.tool_calls -%} + {{- '<|tool_call|>' + (tool_call | tojson) + '<|/tool_call|>' -}} + {%- endfor -%} + {{- '<|end|>' -}} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{- '<|assistant|>' -}} +{%- else -%} + {{- eos_token -}} +{%- endif -%} \ No newline at end of file