diff --git a/common/chat.cpp b/common/chat.cpp index 62ca26ad7609c..df28190d5eb26 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -448,6 +448,7 @@ std::string common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)"; case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)"; + case COMMON_CHAT_FORMAT_PHI_4: return "Phi-4"; default: throw std::runtime_error("Unknown chat format"); } @@ -583,10 +584,7 @@ static common_chat_msg parse_json_tool_calls( } if (!result.tool_calls.empty()) { - if (!string_strip(result.content).empty()) { - LOG_WRN("Content found with tool calls: %s\n", result.content.c_str()); - } - result.content = ""; + result.content = string_strip(result.content); } return result; } @@ -1356,6 +1354,66 @@ static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::s return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); } +static common_chat_params common_chat_params_init_phi_4(const common_chat_template & tmpl, const struct templates_params & inputs) { + // Phi-4 has a unique format that expects tools in the system message with <|tool|> tags + // and returns function calls as a JSON object after <|tool_call|> tag + common_chat_params data; + + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + std::vector tool_call_alts; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + auto call_rule = builder.add_schema(name + "-call", { + {"type", "object"}, + {"properties", { + {"name", {{"const", name}}}, + {"arguments", parameters}, + }}, + {"required", json::array({"name", "arguments"})}, + }); + tool_rules.push_back(builder.add_rule(name + "-call", "\"<|tool_call|>\" " + call_rule + " \"<|/tool_call|>\"")); + }); + auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space"); + std::vector alt_tags { + any_tool_call, + }; + tool_call_alts.push_back(any_tool_call); + auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_call|>"}); + data.preserved_tokens = { + "<|tool_call|>", + "<|/tool_call|>", + "<|tool_response|>", + "<|tool|>", + "<|/tool|>", + }; + }); + + // For Phi-4, we need to inject tools into the system message + // because the template expects tools in the system message with <|tool|> tags + // The Phi-4 template has issues with tool calls. + // It is advisable to use --chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja + // - It expects tools from the system message (instead of as a global variable as most templates). + // - It does not print tool calls (this is worked around by the Minja + the generic mode, but without the <|tool_call|> syntax) + // - With defaults, it prints tool call results (messages such as {"role": "tool", "name": "foo", "content": "42"}) as <|tool|>42<|end|> which conflicts with the tool description wrapping mechanism. + // - Tool call results are expected to be injected as a message from the <|tool_response|> role. i.e. <|tool_response|>(json.dump())<|end|> + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_PHI_4; + return data; +} + +static common_chat_msg common_chat_parse_phi_4(const std::string & input) { + static std::regex function_regex("<\\|tool_call\\|>\\s*\\{\\s*\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"arguments\"\\s*:"); + static std::regex close_regex(R"(\}\s*(<\|/tool_call\|>)?)"); + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); +} + static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; // (content)?({"name": "foo", "arguments": {"a": 1}})* @@ -1642,6 +1700,15 @@ static common_chat_params common_chat_templates_apply_jinja( return common_chat_params_init_firefunction_v2(tmpl, params); } + // Phi-4 mini. + if (src.find("<|tool|>") != std::string::npos) { + if (src.find("<|tool_response|>") != std::string::npos) { + return common_chat_params_init_phi_4(tmpl, params); + } else { + LOG_WRN("[%s] Invalid legacy Phi 4 template detected: switching to Generic tool call format. To enable native support, please restart with `--chat-template-file models/template/microsoft-Phi-4-mini-instruct.jinja`", __func__); + } + } + // Plain handler (no tools) if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { return common_chat_params_init_without_tools(tmpl, params); @@ -1773,6 +1840,8 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format return common_chat_parse_command_r7b(input, /* extract_reasoning= */ false); case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true); + case COMMON_CHAT_FORMAT_PHI_4: + return common_chat_parse_phi_4(input); default: throw std::runtime_error("Unsupported format: " + common_chat_format_name(format)); } diff --git a/common/chat.h b/common/chat.h index 9aad84e880448..72215c49eb31d 100644 --- a/common/chat.h +++ b/common/chat.h @@ -56,6 +56,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING, COMMON_CHAT_FORMAT_COMMAND_R7B, COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING, + COMMON_CHAT_FORMAT_PHI_4, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; diff --git a/docs/function-calling.md b/docs/function-calling.md index c3873c3fa63d1..62261077dd508 100644 --- a/docs/function-calling.md +++ b/docs/function-calling.md @@ -12,11 +12,12 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2 - Functionary v3.1 / v3.2 - Hermes 2/3, Qwen 2.5 - - Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034) + - Qwen 2.5 Coder (#12034) - Mistral Nemo - Firefunction v2 - - Command R7B - - DeepSeek R1 (WIP / seems reluctant to call any tools?) + - Command R7B (#11585) + - DeepSeek R1 (#11607) + - Phi 4 (#12288) - Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs). - Use `--chat-template-file` to override the template when appropriate (see examples below) @@ -297,9 +298,14 @@ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \ --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja +# Native support for Phi 4 also needs a template override (official template is buggy) + +llama-server --jinja -fa -hf bartowski/microsoft_Phi-4-mini-instruct-GGUF \ + --chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja + # Native support requires the right template for these GGUFs: -llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M +llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M \ --chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py index 569c2a1f8ea31..206d087ff1e3e 100755 --- a/examples/server/tests/unit/test_tool_call.py +++ b/examples/server/tests/unit/test_tool_call.py @@ -138,6 +138,8 @@ def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict, ("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", PYTHON_TOOL, "code"), ("fireworks-ai-llama-3-firefunction-v2", TEST_TOOL, "success"), # ("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"), + ("microsoft-Phi-4-mini-instruct", TEST_TOOL, "success"), + ("microsoft-Phi-4-mini-instruct", PYTHON_TOOL, "code"), ]) def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, argument_key: str | None): global server @@ -164,6 +166,10 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, (PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), (PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + (TEST_TOOL, "success", "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), + (PYTHON_TOOL, "code", "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), + (PYTHON_TOOL, "code", "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), + (TEST_TOOL, "success", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), (PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), (PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"), @@ -306,6 +312,9 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), + ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), + ("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), ("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"), @@ -385,6 +394,9 @@ def do_test_weather(server: ServerProcess, **kwargs): @pytest.mark.slow @pytest.mark.parametrize("result_override,n_predict,hf_repo,template_override", [ (None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + # Answers using text, not tools, complaining it wants to measure from the positive Z-axis not x-axis. + # (None, 128, "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), + (None, 128, "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), (None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None), (None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"), (None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"), @@ -394,6 +406,7 @@ def do_test_weather(server: ServerProcess, **kwargs): (None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), (None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"), (None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + (None, 128, "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", None), ("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)), # TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value) @@ -535,6 +548,9 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none'] ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), + ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), + ("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)), ("bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"), diff --git a/models/templates/README.md b/models/templates/README.md index e4fd104fc9fe6..66b56e4679f4a 100644 --- a/models/templates/README.md +++ b/models/templates/README.md @@ -19,4 +19,5 @@ These templates can be updated with the following commands: ./scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use > models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja ./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use > models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja ./scripts/get_chat_template.py Qwen/Qwen2.5-7B-Instruct > models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +./scripts/get_chat_template.py microsoft/Phi-4-mini-instruct > models/templates/microsoft-Phi-4-mini-instruct.jinja ``` \ No newline at end of file diff --git a/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja b/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja new file mode 100644 index 0000000000000..dd3a199fe78e9 --- /dev/null +++ b/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja @@ -0,0 +1,37 @@ +{%- if messages[0]["role"] == "system" %} + {%- set system_message = messages[0]["content"] %} +{% elif tools is defined -%} + {%- set system_message = "" -%} +{%- endif %} + +{%- if system_message is defined -%} + {{- '<|system|>' + system_message -}} + {%- if tools is defined -%} + {% for tool in tools %} + {{- '<|tool|>' + (tool['function'] | tojson) + '<|/tool|>' -}} + {% endfor %} + {%- if '<|tool_call|>' not in system_message -%} + {{- 'You are a helpful assistant with some tools.\nTo use a tool, respond in this format: <|tool_call|>{"name": "foo", "arguments": {"a": 1}}<|/tool_call|>' -}} + {%- endif -%} + {%- endif -%} + {{- '<|end|>' -}} +{%- endif -%} +{%- for message in messages -%} + {%- if message['role'] == 'tool' -%} + {{- '<|tool_response|>' + (message['content'] | tojson) + '<|end|>' -}} + {%- elif message['role'] != 'system' -%} + {{- '<|' + message['role'] + '|>' -}} + {%- if message.content -%} + {{- message['content'] -}} + {%- endif -%} + {%- for tool_call in message.tool_calls -%} + {{- '<|tool_call|>' + (tool_call['function'] | tojson) + '<|/tool_call|>' -}} + {%- endfor -%} + {{- '<|end|>' -}} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{- '<|assistant|>' -}} +{%- else -%} + {{- eos_token -}} +{%- endif -%} \ No newline at end of file diff --git a/models/templates/microsoft-Phi-4-mini-instruct.jinja b/models/templates/microsoft-Phi-4-mini-instruct.jinja new file mode 100644 index 0000000000000..a9c00dd9bbd97 --- /dev/null +++ b/models/templates/microsoft-Phi-4-mini-instruct.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index a1034b1a41b12..e13befdb7c179 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -820,6 +820,36 @@ static void test_template_output_parsers() { test_templates(tmpls.get(), end_tokens, message_assist_call, tools, "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); } + { + auto tmpls = read_templates("models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja"); + std::vector end_tokens{ "<|end|>" }; + + assert_equals(COMMON_CHAT_FORMAT_PHI_4, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + + // Test normal message without tools + test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + + // Test with content before tool call + assert_msg_equals( + common_chat_msg{"assistant", "I'll help with that.", {}, tool_calls, "", "", ""}, + common_chat_parse( + "I'll help with that.<|tool_call|>{\"name\":\"special_function\",\"arguments\":{\"arg1\":1}}<|/tool_call|>", + COMMON_CHAT_FORMAT_PHI_4)); + + // Test with content after tool call + assert_msg_equals( + common_chat_msg{"assistant", "I'll help with that.", {}, tool_calls, "", "", ""}, + common_chat_parse( + "<|tool_call|>{\"name\":\"special_function\",\"arguments\":{\"arg1\":1}}<|/tool_call|>I'll help with that.", + COMMON_CHAT_FORMAT_PHI_4)); + + // Test with newlines. + assert_msg_equals(message_assist_call, common_chat_parse( + "<|tool_call|>\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "<|/tool_call|>", + COMMON_CHAT_FORMAT_PHI_4)); + } { auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja"); std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" };