From ccf02352d40d2241af134b336364b9cab6a41cda Mon Sep 17 00:00:00 2001
From: Jakub Hrozek <jakub@stacklok.com>
Date: Thu, 30 Jan 2025 17:01:44 +0100
Subject: [PATCH 1/7] Do not send the system message, only the user message to
 Ollama FIM

We just blindly took the first message which could have been a system
message. The prompt is in the user message, so let's pass that.
---
 src/codegate/providers/ollama/completion_handler.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
index cfd04acd..ecc3a76a 100644
--- a/src/codegate/providers/ollama/completion_handler.py
+++ b/src/codegate/providers/ollama/completion_handler.py
@@ -89,7 +89,14 @@ async def execute_completion(
         """Stream response directly from Ollama API."""
         self.base_tool = base_tool
         if is_fim_request:
-            prompt = request["messages"][0].get("content", "")
+            prompt = ""
+            for i in reversed(range(len(request["messages"]))):
+                if request["messages"][i]["role"] == "user":
+                    prompt = request["messages"][i]["content"]  # type: ignore
+                    break
+            if not prompt:
+                raise ValueError("No user message found in FIM request")
+
             response = await self.client.generate(
                 model=request["model"], prompt=prompt, stream=stream, options=request["options"]  # type: ignore
             )

From 3b8a3d381aa5114b260685953985c6110fd81166 Mon Sep 17 00:00:00 2001
From: Jakub Hrozek <jakub@stacklok.com>
Date: Thu, 30 Jan 2025 17:06:52 +0100
Subject: [PATCH 2/7] Pass the suffix parameter if present to the FIM generate
 call

Some models don't use FIM markers in the code, but use the suffix
top-level attribute instead. If we don't pass it, the FIM won't complete
successfully.
---
 src/codegate/providers/ollama/completion_handler.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
index ecc3a76a..443895f2 100644
--- a/src/codegate/providers/ollama/completion_handler.py
+++ b/src/codegate/providers/ollama/completion_handler.py
@@ -98,7 +98,11 @@ async def execute_completion(
                 raise ValueError("No user message found in FIM request")
 
             response = await self.client.generate(
-                model=request["model"], prompt=prompt, stream=stream, options=request["options"]  # type: ignore
+                model=request["model"],
+                prompt=prompt,
+                suffix=request.get("suffix", ""),
+                stream=stream,
+                options=request["options"]  # type: ignore
             )
         else:
             response = await self.client.chat(

From 1a306867ff58f8fcf170ffaf11427bb7ed32563b Mon Sep 17 00:00:00 2001
From: Radoslav Dimitrov <radoslav@stacklok.com>
Date: Thu, 30 Jan 2025 13:51:55 +0200
Subject: [PATCH 3/7] Run make format

Signed-off-by: Radoslav Dimitrov <radoslav@stacklok.com>
---
 src/codegate/providers/ollama/completion_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
index 443895f2..78025c50 100644
--- a/src/codegate/providers/ollama/completion_handler.py
+++ b/src/codegate/providers/ollama/completion_handler.py
@@ -102,7 +102,7 @@ async def execute_completion(
                 prompt=prompt,
                 suffix=request.get("suffix", ""),
                 stream=stream,
-                options=request["options"]  # type: ignore
+                options=request["options"],  # type: ignore
             )
         else:
             response = await self.client.chat(

From 1100547207e757ca393a81312b0c9f86a7533ea2 Mon Sep 17 00:00:00 2001
From: Radoslav Dimitrov <radoslav@stacklok.com>
Date: Thu, 30 Jan 2025 14:38:57 +0200
Subject: [PATCH 4/7] Fix the unit tests for ollama FIM

Signed-off-by: Radoslav Dimitrov <radoslav@stacklok.com>
---
 tests/providers/ollama/test_ollama_completion_handler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/providers/ollama/test_ollama_completion_handler.py b/tests/providers/ollama/test_ollama_completion_handler.py
index cc32e915..6818d442 100644
--- a/tests/providers/ollama/test_ollama_completion_handler.py
+++ b/tests/providers/ollama/test_ollama_completion_handler.py
@@ -40,6 +40,7 @@ async def test_execute_completion_is_fim_request(handler, chat_request):
         prompt="FIM prompt",
         stream=False,
         options=chat_request["options"],
+        suffix="",
     )
 
 

From 328c048058beffcf065ad68b1004fa6a0192d17d Mon Sep 17 00:00:00 2001
From: Jakub Hrozek <jakub@stacklok.com>
Date: Thu, 30 Jan 2025 16:35:33 +0100
Subject: [PATCH 5/7] Pass along the `raw` parameter

The `raw` parameter tells the LLM to never use natural language, but
just reply in the format of the message. We need to pass that to the
generate call or else we migth get garbage back to the client.
---
 src/codegate/providers/ollama/completion_handler.py      | 1 +
 tests/providers/ollama/test_ollama_completion_handler.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
index 78025c50..9e602dc9 100644
--- a/src/codegate/providers/ollama/completion_handler.py
+++ b/src/codegate/providers/ollama/completion_handler.py
@@ -100,6 +100,7 @@ async def execute_completion(
             response = await self.client.generate(
                 model=request["model"],
                 prompt=prompt,
+                raw=request.get("raw", False),
                 suffix=request.get("suffix", ""),
                 stream=stream,
                 options=request["options"],  # type: ignore
diff --git a/tests/providers/ollama/test_ollama_completion_handler.py b/tests/providers/ollama/test_ollama_completion_handler.py
index 6818d442..df0eb149 100644
--- a/tests/providers/ollama/test_ollama_completion_handler.py
+++ b/tests/providers/ollama/test_ollama_completion_handler.py
@@ -41,6 +41,7 @@ async def test_execute_completion_is_fim_request(handler, chat_request):
         stream=False,
         options=chat_request["options"],
         suffix="",
+        raw=False,
     )
 
 

From e91201da9f433a81c5504d4521abd8da73489a3c Mon Sep 17 00:00:00 2001
From: Jakub Hrozek <jakub@stacklok.com>
Date: Thu, 30 Jan 2025 17:10:16 +0100
Subject: [PATCH 6/7] Print the full reply as debug message in integration
 tests

This is just useful to debug the tests
---
 tests/integration/integration_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/integration_tests.py b/tests/integration/integration_tests.py
index ebed95ed..c9930353 100644
--- a/tests/integration/integration_tests.py
+++ b/tests/integration/integration_tests.py
@@ -139,6 +139,7 @@ async def run_test(self, test: dict, test_headers: dict) -> bool:
 
         try:
             parsed_response = self.parse_response_message(response, streaming=streaming)
+            logger.debug(f"Response message: {parsed_response}")
 
             # Load appropriate checks for this test
             checks = CheckLoader.load(test)

From 4e2c5f1e7fdc8ca30367e8334b567ea6b5cdc2c4 Mon Sep 17 00:00:00 2001
From: Jakub Hrozek <jakub@stacklok.com>
Date: Thu, 30 Jan 2025 21:01:28 +0100
Subject: [PATCH 7/7] Adjust the Ollama FIM testcase to match the model we are
 using

The FIM format didn't match the model it seemed. I replaced it with a
dump of a FIM message I received from Continue.
---
 tests/integration/testcases.yaml | 62 ++++++++++++--------------------
 1 file changed, 23 insertions(+), 39 deletions(-)

diff --git a/tests/integration/testcases.yaml b/tests/integration/testcases.yaml
index 9190dc9c..9109088c 100644
--- a/tests/integration/testcases.yaml
+++ b/tests/integration/testcases.yaml
@@ -297,48 +297,32 @@ testcases:
     url: http://127.0.0.1:8989/ollama/api/generate
     data: |
       {
-        "model": "qwen2.5-coder:0.5b",
-        "max_tokens": 4096,
-        "temperature": 0,
         "stream": true,
-        "stop": [
-          "<fim_prefix>",
-          "<fim_suffix>",
-          "<fim_middle>",
-          "<file_sep>",
-          "</fim_middle>",
-          "</code>",
-          "/src/",
-          "#- coding: utf-8",
-          "```",
-          ""
-        ],
+        "model": "qwen2.5-coder:0.5b",
+        "raw": true,
+        "options": {
+          "temperature": 0.01,
+          "num_predict": 4096,
+          "stop": [
+            "<|endoftext|>",
+            "<|fim_prefix|>",
+            "<|fim_middle|>",
+            "<|fim_suffix|>",
+            "<|fim_pad|>",
+            "<|repo_name|>",
+            "<|file_sep|>",
+            "<|im_start|>",
+            "<|im_end|>",
+            "/src/",
+            "#- coding: utf-8",
+            "```"
+          ],
+          "num_ctx": 8096
+        },
         "prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n    <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
       }
     likes: |
       ```python
-      import invokehttp
-      import requests
-
-      key = "mysecret-key"
-
-      def call_api(url, method='get', data=None):
-          headers = {
-              'Authorization': f'Bearer {key}'
-          }
-
-          if method == 'get':
-              response = requests.get(url, headers=headers)
-          elif method == 'post':
-              response = requests.post(url, headers=headers, json=data)
-          else:
-              raise ValueError("Unsupported HTTP method")
-
-          return response
-
-      data = {'key1': 'test1', 'key2': 'test2'}
-      response = call_api('http://localhost:8080', method='post', data=data)
-
-      print(response.status_code)
-      print(response.json())
+      if __name__ == '__main__':
+          invokehttp.run(call_api)
       ```