Fix ollama arguments

stellasia · stellasia · commit 6cf8f673c2fb · 2025-07-31T14:44:50.000+02:00
diff --git a/src/neo4j_graphrag/llm/ollama_llm.py b/src/neo4j_graphrag/llm/ollama_llm.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 from __future__ import annotations
 
+import warnings
 from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Sequence, Union, cast
 
 from pydantic import ValidationError
@@ -59,6 +60,19 @@ def __init__(
         self.async_client = ollama.AsyncClient(
             **kwargs,
         )
+        if "stream" in self.model_params:
+            raise ValueError("Streaming is not supported by the OllamaLLM wrapper")
+        # bug-fix with backward compatibility:
+        # we mistakenly passed all "model_params" under the options argument
+        # next two lines to be removed in 2.0
+        if not any(
+            key in self.model_params for key in ("options", "format", "keep_alive")
+        ):
+            warnings.warn(
+                """Passing options directly without including them in an 'options' key is deprecated. Ie you must use model_params={"options": {"temperature": 0}}""",
+                DeprecationWarning,
+            )
+            self.model_params = {"options": self.model_params}
 
     def get_messages(
         self,
@@ -104,7 +118,7 @@ def invoke(
             response = self.client.chat(
                 model=self.model_name,
                 messages=self.get_messages(input, message_history, system_instruction),
-                options=self.model_params,
+                **self.model_params,
             )
             content = response.message.content or ""
             return LLMResponse(content=content)
diff --git a/tests/unit/llm/test_ollama_llm.py b/tests/unit/llm/test_ollama_llm.py
@@ -35,28 +35,79 @@ def test_ollama_llm_missing_dependency(mock_import: Mock) -> None:
 
 
 @patch("builtins.__import__")
-def test_ollama_llm_happy_path(mock_import: Mock) -> None:
+def test_ollama_llm_happy_path_deprecated_options(mock_import: Mock) -> None:
     mock_ollama = get_mock_ollama()
     mock_import.return_value = mock_ollama
     mock_ollama.Client.return_value.chat.return_value = MagicMock(
         message=MagicMock(content="ollama chat response"),
     )
     model = "gpt"
     model_params = {"temperature": 0.3}
+    with pytest.warns(DeprecationWarning) as record:
+        llm = OllamaLLM(
+            model,
+            model_params=model_params,
+        )
+    assert len(record) == 1
+    assert (
+        'you must use model_params={"options": {"temperature": 0}}'
+        in record[0].message.args[0]
+    )
+
+    question = "What is graph RAG?"
+    res = llm.invoke(question)
+    assert isinstance(res, LLMResponse)
+    assert res.content == "ollama chat response"
+    messages = [
+        {"role": "user", "content": question},
+    ]
+    llm.client.chat.assert_called_once_with(  # type: ignore[attr-defined]
+        model=model, messages=messages, options={"temperature": 0.3}
+    )
+
+
+@patch("builtins.__import__")
+def test_ollama_llm_unsupported_streaming(mock_import: Mock) -> None:
+    mock_ollama = get_mock_ollama()
+    mock_import.return_value = mock_ollama
+    mock_ollama.Client.return_value.chat.return_value = MagicMock(
+        message=MagicMock(content="ollama chat response"),
+    )
+    model = "gpt"
+    model_params = {"stream": True}
+    with pytest.raises(ValueError):
+        OllamaLLM(
+            model,
+            model_params=model_params,
+        )
+
+
+@patch("builtins.__import__")
+def test_ollama_llm_happy_path(mock_import: Mock) -> None:
+    mock_ollama = get_mock_ollama()
+    mock_import.return_value = mock_ollama
+    mock_ollama.Client.return_value.chat.return_value = MagicMock(
+        message=MagicMock(content="ollama chat response"),
+    )
+    model = "gpt"
+    options = {"temperature": 0.3}
+    model_params = {"options": options, "format": "json"}
     question = "What is graph RAG?"
     llm = OllamaLLM(
-        model,
+        model_name=model,
         model_params=model_params,
     )
-
     res = llm.invoke(question)
     assert isinstance(res, LLMResponse)
     assert res.content == "ollama chat response"
     messages = [
         {"role": "user", "content": question},
     ]
     llm.client.chat.assert_called_once_with(  # type: ignore[attr-defined]
-        model=model, messages=messages, options=model_params
+        model=model,
+        messages=messages,
+        options=options,
+        format="json",
     )
 
 
@@ -68,7 +119,8 @@ def test_ollama_invoke_with_system_instruction_happy_path(mock_import: Mock) ->
         message=MagicMock(content="ollama chat response"),
     )
     model = "gpt"
-    model_params = {"temperature": 0.3}
+    options = {"temperature": 0.3}
+    model_params = {"options": options, "format": "json"}
     llm = OllamaLLM(
         model,
         model_params=model_params,
@@ -81,7 +133,10 @@ def test_ollama_invoke_with_system_instruction_happy_path(mock_import: Mock) ->
     messages = [{"role": "system", "content": system_instruction}]
     messages.append({"role": "user", "content": question})
     llm.client.chat.assert_called_once_with(  # type: ignore[attr-defined]
-        model=model, messages=messages, options=model_params
+        model=model,
+        messages=messages,
+        options=options,
+        format="json",
     )
 
 
@@ -93,7 +148,8 @@ def test_ollama_invoke_with_message_history_happy_path(mock_import: Mock) -> Non
         message=MagicMock(content="ollama chat response"),
     )
     model = "gpt"
-    model_params = {"temperature": 0.3}
+    options = {"temperature": 0.3}
+    model_params = {"options": options}
     llm = OllamaLLM(
         model,
         model_params=model_params,
@@ -109,7 +165,7 @@ def test_ollama_invoke_with_message_history_happy_path(mock_import: Mock) -> Non
     messages = [m for m in message_history]
     messages.append({"role": "user", "content": question})
     llm.client.chat.assert_called_once_with(  # type: ignore[attr-defined]
-        model=model, messages=messages, options=model_params
+        model=model, messages=messages, options=options
     )
 
 
@@ -123,7 +179,8 @@ def test_ollama_invoke_with_message_history_and_system_instruction(
         message=MagicMock(content="ollama chat response"),
     )
     model = "gpt"
-    model_params = {"temperature": 0.3}
+    options = {"temperature": 0.3}
+    model_params = {"options": options}
     system_instruction = "You are a helpful assistant."
     llm = OllamaLLM(
         model,
@@ -145,7 +202,7 @@ def test_ollama_invoke_with_message_history_and_system_instruction(
     messages.extend(message_history)
     messages.append({"role": "user", "content": question})
     llm.client.chat.assert_called_once_with(  # type: ignore[attr-defined]
-        model=model, messages=messages, options=model_params
+        model=model, messages=messages, options=options
     )
     assert llm.client.chat.call_count == 1  # type: ignore
 
@@ -156,7 +213,8 @@ def test_ollama_invoke_with_message_history_validation_error(mock_import: Mock)
     mock_import.return_value = mock_ollama
     mock_ollama.ResponseError = ollama.ResponseError
     model = "gpt"
-    model_params = {"temperature": 0.3}
+    options = {"temperature": 0.3}
+    model_params = {"options": options}
     system_instruction = "You are a helpful assistant."
     llm = OllamaLLM(
         model,
@@ -187,7 +245,8 @@ async def mock_chat_async(*args: Any, **kwargs: Any) -> MagicMock:
 
     mock_ollama.AsyncClient.return_value.chat = mock_chat_async
     model = "gpt"
-    model_params = {"temperature": 0.3}
+    options = {"temperature": 0.3}
+    model_params = {"options": options}
     question = "What is graph RAG?"
     llm = OllamaLLM(
         model,