Fix CI + update doc and examples

stellasia · stellasia · commit 32cd4b546ffc · 2025-07-31T14:54:47.000+02:00
diff --git a/docs/source/user_guide_rag.rst b/docs/source/user_guide_rag.rst
@@ -225,6 +225,7 @@ it can be queried using the following:
     from neo4j_graphrag.llm import OllamaLLM
     llm = OllamaLLM(
         model_name="orca-mini",
+        # model_params={"options": {"temperature": 0}, "format": "json"},
         # host="...",  # when using a remote server
     )
     llm.invoke("say something")
@@ -305,17 +306,17 @@ Default Rate Limit Handler
 Rate limiting is enabled by default for all LLM instances with the following configuration:
 
 - **Max attempts**: 3
-- **Min wait**: 1.0 seconds  
+- **Min wait**: 1.0 seconds
 - **Max wait**: 60.0 seconds
 - **Multiplier**: 2.0 (exponential backoff)
 
 .. code:: python
 
     from neo4j_graphrag.llm import OpenAILLM
-    
+
     # Rate limiting is automatically enabled
     llm = OpenAILLM(model_name="gpt-4o")
-    
+
     # The LLM will automatically retry on rate limit errors
     response = llm.invoke("Hello, world!")
 
@@ -327,7 +328,7 @@ Rate limiting is enabled by default for all LLM instances with the following con
 
         from neo4j_graphrag.llm import OpenAILLM
         from neo4j_graphrag.llm.rate_limit import RetryRateLimitHandler
-        
+
         # Customize rate limiting parameters
         llm = OpenAILLM(
             model_name="gpt-4o",
@@ -348,15 +349,15 @@ You can customize the rate limiting behavior by creating your own rate limit han
 
     from neo4j_graphrag.llm import AnthropicLLM
     from neo4j_graphrag.llm.rate_limit import RateLimitHandler
-    
+
     class CustomRateLimitHandler(RateLimitHandler):
         """Implement your custom rate limiting strategy."""
         # Implement required methods: handle_sync, handle_async
         pass
-    
+
     # Create custom rate limit handler and pass it to the LLM interface
     custom_handler = CustomRateLimitHandler()
-    
+
     llm = AnthropicLLM(
         model_name="claude-3-sonnet-20240229",
         rate_limit_handler=custom_handler,
@@ -370,7 +371,7 @@ For high-throughput applications or when you handle rate limiting externally, yo
 .. code:: python
 
     from neo4j_graphrag.llm import CohereLLM, NoOpRateLimitHandler
-    
+
     # Disable rate limiting completely
     llm = CohereLLM(
         model_name="command-r-plus",
diff --git a/examples/customize/llms/ollama_llm.py b/examples/customize/llms/ollama_llm.py
@@ -6,6 +6,7 @@
 
 llm = OllamaLLM(
     model_name="<model_name>",
+    # model_params={"options": {"temperature": 0}, "format": "json"},
     # host="...",  # if using a remote server
 )
 res: LLMResponse = llm.invoke("What is the additive color model?")
diff --git a/tests/unit/llm/test_ollama_llm.py b/tests/unit/llm/test_ollama_llm.py
@@ -49,6 +49,7 @@ def test_ollama_llm_happy_path_deprecated_options(mock_import: Mock) -> None:
             model_params=model_params,
         )
     assert len(record) == 1
+    assert isinstance(record[0].message, Warning)
     assert (
         'you must use model_params={"options": {"temperature": 0}}'
         in record[0].message.args[0]

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@`
`6`	`6`
`7`	`7`	`llm = OllamaLLM(`
`8`	`8`	`model_name="<model_name>",`
	`9`	`+ # model_params={"options": {"temperature": 0}, "format": "json"},`
`9`	`10`	`# host="...", # if using a remote server`
`10`	`11`	`)`
`11`	`12`	`res: LLMResponse = llm.invoke("What is the additive color model?")`
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ def test_ollama_llm_happy_path_deprecated_options(mock_import: Mock) -> None:`
`49`	`49`	`model_params=model_params,`
`50`	`50`	`)`
`51`	`51`	`assert len(record) == 1`
	`52`	`+ assert isinstance(record[0].message, Warning)`
`52`	`53`	`assert (`
`53`	`54`	`'you must use model_params={"options": {"temperature": 0}}'`
`54`	`55`	`in record[0].message.args[0]`