From ccf02352d40d2241af134b336364b9cab6a41cda Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Thu, 30 Jan 2025 17:01:44 +0100 Subject: [PATCH 1/7] Do not send the system message, only the user message to Ollama FIM We just blindly took the first message which could have been a system message. The prompt is in the user message, so let's pass that. --- src/codegate/providers/ollama/completion_handler.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py index cfd04acd..ecc3a76a 100644 --- a/src/codegate/providers/ollama/completion_handler.py +++ b/src/codegate/providers/ollama/completion_handler.py @@ -89,7 +89,14 @@ async def execute_completion( """Stream response directly from Ollama API.""" self.base_tool = base_tool if is_fim_request: - prompt = request["messages"][0].get("content", "") + prompt = "" + for i in reversed(range(len(request["messages"]))): + if request["messages"][i]["role"] == "user": + prompt = request["messages"][i]["content"] # type: ignore + break + if not prompt: + raise ValueError("No user message found in FIM request") + response = await self.client.generate( model=request["model"], prompt=prompt, stream=stream, options=request["options"] # type: ignore ) From 3b8a3d381aa5114b260685953985c6110fd81166 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Thu, 30 Jan 2025 17:06:52 +0100 Subject: [PATCH 2/7] Pass the suffix parameter if present to the FIM generate call Some models don't use FIM markers in the code, but use the suffix top-level attribute instead. If we don't pass it, the FIM won't complete successfully. --- src/codegate/providers/ollama/completion_handler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py index ecc3a76a..443895f2 100644 --- a/src/codegate/providers/ollama/completion_handler.py +++ b/src/codegate/providers/ollama/completion_handler.py @@ -98,7 +98,11 @@ async def execute_completion( raise ValueError("No user message found in FIM request") response = await self.client.generate( - model=request["model"], prompt=prompt, stream=stream, options=request["options"] # type: ignore + model=request["model"], + prompt=prompt, + suffix=request.get("suffix", ""), + stream=stream, + options=request["options"] # type: ignore ) else: response = await self.client.chat( From 1a306867ff58f8fcf170ffaf11427bb7ed32563b Mon Sep 17 00:00:00 2001 From: Radoslav Dimitrov Date: Thu, 30 Jan 2025 13:51:55 +0200 Subject: [PATCH 3/7] Run make format Signed-off-by: Radoslav Dimitrov --- src/codegate/providers/ollama/completion_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py index 443895f2..78025c50 100644 --- a/src/codegate/providers/ollama/completion_handler.py +++ b/src/codegate/providers/ollama/completion_handler.py @@ -102,7 +102,7 @@ async def execute_completion( prompt=prompt, suffix=request.get("suffix", ""), stream=stream, - options=request["options"] # type: ignore + options=request["options"], # type: ignore ) else: response = await self.client.chat( From 1100547207e757ca393a81312b0c9f86a7533ea2 Mon Sep 17 00:00:00 2001 From: Radoslav Dimitrov Date: Thu, 30 Jan 2025 14:38:57 +0200 Subject: [PATCH 4/7] Fix the unit tests for ollama FIM Signed-off-by: Radoslav Dimitrov --- tests/providers/ollama/test_ollama_completion_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/providers/ollama/test_ollama_completion_handler.py b/tests/providers/ollama/test_ollama_completion_handler.py index cc32e915..6818d442 100644 --- a/tests/providers/ollama/test_ollama_completion_handler.py +++ b/tests/providers/ollama/test_ollama_completion_handler.py @@ -40,6 +40,7 @@ async def test_execute_completion_is_fim_request(handler, chat_request): prompt="FIM prompt", stream=False, options=chat_request["options"], + suffix="", ) From 328c048058beffcf065ad68b1004fa6a0192d17d Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Thu, 30 Jan 2025 16:35:33 +0100 Subject: [PATCH 5/7] Pass along the `raw` parameter The `raw` parameter tells the LLM to never use natural language, but just reply in the format of the message. We need to pass that to the generate call or else we migth get garbage back to the client. --- src/codegate/providers/ollama/completion_handler.py | 1 + tests/providers/ollama/test_ollama_completion_handler.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py index 78025c50..9e602dc9 100644 --- a/src/codegate/providers/ollama/completion_handler.py +++ b/src/codegate/providers/ollama/completion_handler.py @@ -100,6 +100,7 @@ async def execute_completion( response = await self.client.generate( model=request["model"], prompt=prompt, + raw=request.get("raw", False), suffix=request.get("suffix", ""), stream=stream, options=request["options"], # type: ignore diff --git a/tests/providers/ollama/test_ollama_completion_handler.py b/tests/providers/ollama/test_ollama_completion_handler.py index 6818d442..df0eb149 100644 --- a/tests/providers/ollama/test_ollama_completion_handler.py +++ b/tests/providers/ollama/test_ollama_completion_handler.py @@ -41,6 +41,7 @@ async def test_execute_completion_is_fim_request(handler, chat_request): stream=False, options=chat_request["options"], suffix="", + raw=False, ) From e91201da9f433a81c5504d4521abd8da73489a3c Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Thu, 30 Jan 2025 17:10:16 +0100 Subject: [PATCH 6/7] Print the full reply as debug message in integration tests This is just useful to debug the tests --- tests/integration/integration_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/integration_tests.py b/tests/integration/integration_tests.py index ebed95ed..c9930353 100644 --- a/tests/integration/integration_tests.py +++ b/tests/integration/integration_tests.py @@ -139,6 +139,7 @@ async def run_test(self, test: dict, test_headers: dict) -> bool: try: parsed_response = self.parse_response_message(response, streaming=streaming) + logger.debug(f"Response message: {parsed_response}") # Load appropriate checks for this test checks = CheckLoader.load(test) From 4e2c5f1e7fdc8ca30367e8334b567ea6b5cdc2c4 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Thu, 30 Jan 2025 21:01:28 +0100 Subject: [PATCH 7/7] Adjust the Ollama FIM testcase to match the model we are using The FIM format didn't match the model it seemed. I replaced it with a dump of a FIM message I received from Continue. --- tests/integration/testcases.yaml | 62 ++++++++++++-------------------- 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/tests/integration/testcases.yaml b/tests/integration/testcases.yaml index 9190dc9c..9109088c 100644 --- a/tests/integration/testcases.yaml +++ b/tests/integration/testcases.yaml @@ -297,48 +297,32 @@ testcases: url: http://127.0.0.1:8989/ollama/api/generate data: | { - "model": "qwen2.5-coder:0.5b", - "max_tokens": 4096, - "temperature": 0, "stream": true, - "stop": [ - "", - "", - "", - "", - "", - "", - "/src/", - "#- coding: utf-8", - "```", - "" - ], + "model": "qwen2.5-coder:0.5b", + "raw": true, + "options": { + "temperature": 0.01, + "num_predict": 4096, + "stop": [ + "<|endoftext|>", + "<|fim_prefix|>", + "<|fim_middle|>", + "<|fim_suffix|>", + "<|fim_pad|>", + "<|repo_name|>", + "<|file_sep|>", + "<|im_start|>", + "<|im_end|>", + "/src/", + "#- coding: utf-8", + "```" + ], + "num_ctx": 8096 + }, "prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>" } likes: | ```python - import invokehttp - import requests - - key = "mysecret-key" - - def call_api(url, method='get', data=None): - headers = { - 'Authorization': f'Bearer {key}' - } - - if method == 'get': - response = requests.get(url, headers=headers) - elif method == 'post': - response = requests.post(url, headers=headers, json=data) - else: - raise ValueError("Unsupported HTTP method") - - return response - - data = {'key1': 'test1', 'key2': 'test2'} - response = call_api('http://localhost:8080', method='post', data=data) - - print(response.status_code) - print(response.json()) + if __name__ == '__main__': + invokehttp.run(call_api) ```