-
Notifications
You must be signed in to change notification settings - Fork 12.2k
tool-call
: Phi-4 support
#12288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
tool-call
: Phi-4 support
#12288
Changes from all commits
c3aac4e
eae5d97
32d32ef
32ab329
094f607
258b912
274ef56
b15b809
f74aee0
3ca03c7
8ccefe5
6d53c24
c2343b2
a5d014b
5cd800b
09b795d
61ff59e
65c2541
e450590
ff78c90
42858f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -138,6 +138,8 @@ def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict, | |
("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", PYTHON_TOOL, "code"), | ||
("fireworks-ai-llama-3-firefunction-v2", TEST_TOOL, "success"), | ||
# ("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"), | ||
("microsoft-Phi-4-mini-instruct", TEST_TOOL, "success"), | ||
("microsoft-Phi-4-mini-instruct", PYTHON_TOOL, "code"), | ||
]) | ||
def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, argument_key: str | None): | ||
global server | ||
|
@@ -164,6 +166,10 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, | |
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), | ||
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
|
||
(TEST_TOOL, "success", "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), | ||
(PYTHON_TOOL, "code", "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), | ||
(PYTHON_TOOL, "code", "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
|
||
(TEST_TOOL, "success", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), | ||
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), | ||
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"), | ||
|
@@ -306,6 +312,9 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t | |
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), | ||
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you throw in a default (generic) test case for hello_world, weather and calc_result? Just in case their default template changes and something goes boom.
|
||
("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), | ||
("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
|
||
("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), | ||
("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"), | ||
|
||
|
@@ -385,6 +394,9 @@ def do_test_weather(server: ServerProcess, **kwargs): | |
@pytest.mark.slow | ||
@pytest.mark.parametrize("result_override,n_predict,hf_repo,template_override", [ | ||
(None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
# Answers using text, not tools, complaining it wants to measure from the positive Z-axis not x-axis. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this (crucial) test doesn't test tool call emission, but leveraging of tool call results. Failure indicates that the model doesn't understand the syntax used to give it the result. |
||
# (None, 128, "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), | ||
(None, 128, "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
(None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None), | ||
(None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"), | ||
(None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"), | ||
|
@@ -394,6 +406,7 @@ def do_test_weather(server: ServerProcess, **kwargs): | |
(None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), | ||
(None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"), | ||
(None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), | ||
(None, 128, "bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", None), | ||
("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)), | ||
|
||
# TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value) | ||
|
@@ -535,6 +548,9 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none'] | |
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), | ||
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
|
||
("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", ("llama-cpp-microsoft-Phi-4-mini-instruct", None)), | ||
("bartowski/microsoft_Phi-4-mini-instruct-GGUF:Q4_K_M", "chatml"), | ||
|
||
("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)), | ||
("bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"), | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{%- if messages[0]["role"] == "system" %} | ||
{%- set system_message = messages[0]["content"] %} | ||
{% elif tools is defined -%} | ||
{%- set system_message = "" -%} | ||
{%- endif %} | ||
|
||
{%- if system_message is defined -%} | ||
{{- '<|system|>' + system_message -}} | ||
{%- if tools is defined -%} | ||
{% for tool in tools %} | ||
{{- '<|tool|>' + (tool['function'] | tojson) + '<|/tool|>' -}} | ||
{% endfor %} | ||
{%- if '<|tool_call|>' not in system_message -%} | ||
{{- 'You are a helpful assistant with some tools.\nTo use a tool, respond in this format: <|tool_call|>{"name": "foo", "arguments": {"a": 1}}<|/tool_call|>' -}} | ||
{%- endif -%} | ||
{%- endif -%} | ||
{{- '<|end|>' -}} | ||
{%- endif -%} | ||
{%- for message in messages -%} | ||
{%- if message['role'] == 'tool' -%} | ||
{{- '<|tool_response|>' + (message['content'] | tojson) + '<|end|>' -}} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is seems to be the cause of the test_calc_result failure. Good news is, the following (wild hunch / inspired by other tool call styles) makes it work:
|
||
{%- elif message['role'] != 'system' -%} | ||
{{- '<|' + message['role'] + '|>' -}} | ||
{%- if message.content -%} | ||
{{- message['content'] -}} | ||
{%- endif -%} | ||
{%- for tool_call in message.tool_calls -%} | ||
{{- '<|tool_call|>' + (tool_call['function'] | tojson) + '<|/tool_call|>' -}} | ||
{%- endfor -%} | ||
{{- '<|end|>' -}} | ||
{%- endif -%} | ||
{%- endfor -%} | ||
{%- if add_generation_prompt -%} | ||
{{- '<|assistant|>' -}} | ||
{%- else -%} | ||
{{- eos_token -}} | ||
{%- endif -%} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} |
Uh oh!
There was an error while loading. Please reload this page.