ggml-org · jpohhhh · Mar 9, 2025 · Mar 14, 2025 · Mar 15, 2025 · Mar 15, 2025
diff --git a/common/chat.cpp b/common/chat.cpp
@@ -448,6 +448,7 @@ std::string common_chat_format_name(common_chat_format format) {
         case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)";
         case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
         case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
+        case COMMON_CHAT_FORMAT_PHI_4: return "Phi-4";
         default:
             throw std::runtime_error("Unknown chat format");
     }
@@ -583,10 +584,7 @@ static common_chat_msg parse_json_tool_calls(
     }
 
     if (!result.tool_calls.empty()) {
-        if (!string_strip(result.content).empty()) {
-            LOG_WRN("Content found with tool calls: %s\n", result.content.c_str());
-        }
-        result.content = "";
+        result.content = string_strip(result.content);
     }
     return result;
 }
@@ -1356,6 +1354,66 @@ static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::s
     return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
 }
 
+static common_chat_params common_chat_params_init_phi_4(const common_chat_template & tmpl, const struct templates_params & inputs) {
+    // Phi-4 has a unique format that expects tools in the system message with <|tool|> tags
+    // and returns function calls as a JSON object after <|tool_call|> tag
+    common_chat_params data;
+
+    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+        std::vector<std::string> tool_rules;
+        std::vector<std::string> tool_call_alts;
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string name = function.at("name");
+            auto parameters = function.at("parameters");
+            builder.resolve_refs(parameters);
+            auto call_rule = builder.add_schema(name + "-call", {
+                {"type", "object"},
+                {"properties", {
+                    {"name", {{"const", name}}},
+                    {"arguments", parameters},
+                }},
+                {"required", json::array({"name", "arguments"})},
+            });
+            tool_rules.push_back(builder.add_rule(name + "-call", "\"<|tool_call|>\" " + call_rule + " \"<|/tool_call|>\""));
+        });
+        auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
+        std::vector<std::string> alt_tags {
+            any_tool_call,
+        };
+        tool_call_alts.push_back(any_tool_call);
+        auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
+        builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
+        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_call|>"});
+        data.preserved_tokens = {
+            "<|tool_call|>",
+            "<|/tool_call|>",
+            "<|tool_response|>",
+            "<|tool|>",
+            "<|/tool|>",
+        };
+    });
+
+    // For Phi-4, we need to inject tools into the system message
+    // because the template expects tools in the system message with <|tool|> tags
+    // The Phi-4 template has issues with tool calls.
+    // It is advisable to use --chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja
+    // - It expects tools from the system message (instead of as a global variable as most templates).
+    // - It does not print tool calls (this is worked around by the Minja + the generic mode, but without the <|tool_call|> syntax)
+    // - With defaults, it prints tool call results (messages such as {"role": "tool", "name": "foo", "content": "42"}) as <|tool|>42<|end|> which conflicts with the tool description wrapping mechanism.
+    // - Tool call results are expected to be injected as a message from the <|tool_response|> role. i.e. <|tool_response|>(json.dump())<|end|>
+    data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
+    data.format = COMMON_CHAT_FORMAT_PHI_4;
+    return data;
+}
+
+static common_chat_msg common_chat_parse_phi_4(const std::string & input) {
+    static std::regex function_regex("<\\|tool_call\\|>\\s*\\{\\s*\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"arguments\"\\s*:");
+    static std::regex close_regex(R"(\}\s*(<\|/tool_call\|>)?)");
+    return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
+}
+
 static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
     common_chat_params data;
     // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
@@ -1642,6 +1700,15 @@ static common_chat_params common_chat_templates_apply_jinja(
         return common_chat_params_init_firefunction_v2(tmpl, params);
     }
 
+    // Phi-4 mini.
+    if (src.find("<|tool|>") != std::string::npos) {
+        if (src.find("<|tool_response|>") != std::string::npos) {
+            return common_chat_params_init_phi_4(tmpl, params);
+        } else {
+            LOG_WRN("[%s] Invalid legacy Phi 4 template detected: switching to Generic tool call format. To enable native support, please restart with `--chat-template-file models/template/microsoft-Phi-4-mini-instruct.jinja`", __func__);
+        }
+    }
+
     // Plain handler (no tools)
     if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
         return common_chat_params_init_without_tools(tmpl, params);
@@ -1773,6 +1840,8 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
             return common_chat_parse_command_r7b(input, /* extract_reasoning= */ false);
         case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING:
             return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true);
+        case COMMON_CHAT_FORMAT_PHI_4:
+            return common_chat_parse_phi_4(input);
         default:
             throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
     }

diff --git a/common/chat.h b/common/chat.h
@@ -56,6 +56,7 @@ enum common_chat_format {
     COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
     COMMON_CHAT_FORMAT_COMMAND_R7B,
     COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
+    COMMON_CHAT_FORMAT_PHI_4,
 
     COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
 };

diff --git a/docs/function-calling.md b/docs/function-calling.md
@@ -12,11 +12,12 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll
   - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2
   - Functionary v3.1 / v3.2
   - Hermes 2/3, Qwen 2.5
-  - Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034)
+  - Qwen 2.5 Coder (#12034)
   - Mistral Nemo
   - Firefunction v2
-  - Command R7B
-  - DeepSeek R1 (WIP / seems reluctant to call any tools?)
+  - Command R7B (#11585)
+  - DeepSeek R1 (#11607)
+  - Phi 4 (#12288)
 
 - Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs).
   - Use `--chat-template-file` to override the template when appropriate (see examples below)
@@ -297,9 +298,14 @@ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
 llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
     --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
 
+# Native support for Phi 4 also needs a template override (official template is buggy)
+
+llama-server --jinja -fa -hf bartowski/microsoft_Phi-4-mini-instruct-GGUF \
+    --chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja
+
 # Native support requires the right template for these GGUFs:
 
-llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
+llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M \
     --chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja
 
 llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \

diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py
@@ -138,6 +138,8 @@ def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict,
     ("deepseek-ai-DeepSeek-R1-Distill-Llama-8B",      PYTHON_TOOL,          "code"),
     ("fireworks-ai-llama-3-firefunction-v2",          TEST_TOOL,            "success"),
     # ("fireworks-ai-llama-3-firefunction-v2",          PYTHON_TOOL,          "code"),
+    ("microsoft-Phi-4-mini-instruct",                 TEST_TOOL,            "success"),
+    ("microsoft-Phi-4-mini-instruct",                 PYTHON_TOOL,          "code"),
 ])
 def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, argument_key: str | None):
     global server
@@ -164,6 +166,10 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
     (PYTHON_TOOL,  "code",     "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      None),
     (PYTHON_TOOL,  "code",     "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      "chatml"),
 
+    (TEST_TOOL,    "success",  "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M",      ("llama-cpp-microsoft-Phi-4-mini-instruct", None)),
+    (PYTHON_TOOL,  "code",     "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M",      ("llama-cpp-microsoft-Phi-4-mini-instruct", None)),
+    (PYTHON_TOOL,  "code",     "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M",      "chatml"),
+
     (TEST_TOOL,    "success",  "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M",      None),
     (PYTHON_TOOL,  "code",     "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M",      None),
     (PYTHON_TOOL,  "code",     "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M",      "chatml"),
@@ -306,6 +312,9 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
     ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      None),
     ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      "chatml"),
 
+    ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M",      ("llama-cpp-microsoft-Phi-4-mini-instruct", None)),
+    ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M",      "chatml"),
+
     ("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M",      None),
     ("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M",      "chatml"),
 
@@ -385,6 +394,9 @@ def do_test_weather(server: ServerProcess, **kwargs):
 @pytest.mark.slow
 @pytest.mark.parametrize("result_override,n_predict,hf_repo,template_override", [
     (None,                                           128,  "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",       "chatml"),
+    # Answers using text, not tools, complaining it wants to measure from the positive Z-axis not x-axis.
+    # (None,                                           128,  "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)),
+    (None,                                           128,  "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"),
     (None,                                           128,  "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None),
     (None,                                           128,  "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"),
     (None,                                           128,  "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M",         "chatml"),
@@ -394,6 +406,7 @@ def do_test_weather(server: ServerProcess, **kwargs):
     (None,                                           128,  "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M",  None),
     (None,                                           128,  "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M",  "chatml"),
     (None,                                           128,  "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",       None),
+    (None,                                           128,  "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", None),
     ("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)",            8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
 
     # TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value)
@@ -535,6 +548,9 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none']
     ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      None),
     ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      "chatml"),
 
+    ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M",      ("llama-cpp-microsoft-Phi-4-mini-instruct", None)),
+    ("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M",      "chatml"),
+
     ("bartowski/functionary-small-v3.2-GGUF:Q8_0",       ("meetkai-functionary-medium-v3.2", None)),
     ("bartowski/functionary-small-v3.2-GGUF:Q8_0",       "chatml"),
 

diff --git a/models/templates/README.md b/models/templates/README.md
@@ -19,4 +19,5 @@ These templates can be updated with the following commands:
 ./scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use > models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
 ./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use   > models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
 ./scripts/get_chat_template.py Qwen/Qwen2.5-7B-Instruct                      > models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
+./scripts/get_chat_template.py microsoft/Phi-4-mini-instruct                 > models/templates/microsoft-Phi-4-mini-instruct.jinja
 ```
diff --git a/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja b/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja
@@ -0,0 +1,37 @@
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+{% elif tools is defined -%}
+    {%- set system_message = "" -%}
+{%- endif %}
+
+{%- if system_message is defined -%}
+    {{- '<|system|>' + system_message -}}
+    {%- if tools is defined -%}
+        {% for tool in tools %}
+            {{- '<|tool|>' + (tool['function'] | tojson) + '<|/tool|>' -}}
+        {% endfor %}
+        {%- if '<|tool_call|>' not in system_message -%}
+            {{- 'You are a helpful assistant with some tools.\nTo use a tool, respond in this format: <|tool_call|>{"name": "foo", "arguments": {"a": 1}}<|/tool_call|>' -}}
+        {%- endif -%}
+    {%- endif -%}
+    {{- '<|end|>' -}}
+{%- endif -%}
+{%- for message in messages -%}
+    {%- if message['role'] == 'tool' -%}
+        {{- '<|tool_response|>' + (message['content'] | tojson) + '<|end|>' -}}
+    {%- elif message['role'] != 'system' -%}
+        {{- '<|' + message['role'] + '|>' -}}
+        {%- if message.content -%}
+            {{- message['content'] -}}
+        {%- endif -%}  
+        {%- for tool_call in message.tool_calls -%}
+            {{- '<|tool_call|>' + (tool_call['function'] | tojson) + '<|/tool_call|>' -}}
+        {%- endfor -%}
+        {{- '<|end|>' -}}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+   {{- '<|assistant|>' -}}
+{%- else -%}
+   {{- eos_token -}}
+{%- endif -%}
diff --git a/models/templates/microsoft-Phi-4-mini-instruct.jinja b/models/templates/microsoft-Phi-4-mini-instruct.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
@@ -820,6 +820,36 @@ static void test_template_output_parsers() {
         test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
                       "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
     }
+    {
+        auto tmpls = read_templates("models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja");
+        std::vector<std::string>   end_tokens{ "<|end|>" };
+
+        assert_equals(COMMON_CHAT_FORMAT_PHI_4, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+
+        // Test normal message without tools
+        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+
+        // Test with content before tool call
+        assert_msg_equals(
+            common_chat_msg{"assistant", "I'll help with that.", {}, tool_calls, "", "", ""},
+            common_chat_parse(
+                "I'll help with that.<|tool_call|>{\"name\":\"special_function\",\"arguments\":{\"arg1\":1}}<|/tool_call|>",
+                COMMON_CHAT_FORMAT_PHI_4));
+
+        // Test with content after tool call
+        assert_msg_equals(
+            common_chat_msg{"assistant", "I'll help with that.", {}, tool_calls, "", "", ""},
+            common_chat_parse(
+                "<|tool_call|>{\"name\":\"special_function\",\"arguments\":{\"arg1\":1}}<|/tool_call|>I'll help with that.",
+                COMMON_CHAT_FORMAT_PHI_4));
+
+        // Test with newlines.
+        assert_msg_equals(message_assist_call, common_chat_parse(
+            "<|tool_call|>\n"
+            "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+            "<|/tool_call|>",
+            COMMON_CHAT_FORMAT_PHI_4));
+    }
     {
         auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
         std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<\|' + message['role'] + '\|>' + message['content'] + '<\|tool\|>' + message['tools'] + '<\|/tool\|>' + '<\|end\|>' }}{% else %}{{ '<\|' + message['role'] + '\|>' + message['content'] + '<\|end\|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<\|assistant\|>' }}{% else %}{{ eos_token }}{% endif %}