@@ -225,6 +225,7 @@ it can be queried using the following:
225225 from neo4j_graphrag.llm import OllamaLLM
226226 llm = OllamaLLM(
227227 model_name = " orca-mini" ,
228+ # model_params={"options": {"temperature": 0}, "format": "json"},
228229 # host="...", # when using a remote server
229230 )
230231 llm.invoke(" say something" )
@@ -305,17 +306,17 @@ Default Rate Limit Handler
305306Rate limiting is enabled by default for all LLM instances with the following configuration:
306307
307308- **Max attempts **: 3
308- - **Min wait **: 1.0 seconds
309+ - **Min wait **: 1.0 seconds
309310- **Max wait **: 60.0 seconds
310311- **Multiplier **: 2.0 (exponential backoff)
311312
312313.. code :: python
313314
314315 from neo4j_graphrag.llm import OpenAILLM
315-
316+
316317 # Rate limiting is automatically enabled
317318 llm = OpenAILLM(model_name = " gpt-4o" )
318-
319+
319320 # The LLM will automatically retry on rate limit errors
320321 response = llm.invoke(" Hello, world!" )
321322
@@ -327,7 +328,7 @@ Rate limiting is enabled by default for all LLM instances with the following con
327328
328329 from neo4j_graphrag.llm import OpenAILLM
329330 from neo4j_graphrag.llm.rate_limit import RetryRateLimitHandler
330-
331+
331332 # Customize rate limiting parameters
332333 llm = OpenAILLM(
333334 model_name = " gpt-4o" ,
@@ -348,15 +349,15 @@ You can customize the rate limiting behavior by creating your own rate limit han
348349
349350 from neo4j_graphrag.llm import AnthropicLLM
350351 from neo4j_graphrag.llm.rate_limit import RateLimitHandler
351-
352+
352353 class CustomRateLimitHandler (RateLimitHandler ):
353354 """ Implement your custom rate limiting strategy."""
354355 # Implement required methods: handle_sync, handle_async
355356 pass
356-
357+
357358 # Create custom rate limit handler and pass it to the LLM interface
358359 custom_handler = CustomRateLimitHandler()
359-
360+
360361 llm = AnthropicLLM(
361362 model_name = " claude-3-sonnet-20240229" ,
362363 rate_limit_handler = custom_handler,
@@ -370,7 +371,7 @@ For high-throughput applications or when you handle rate limiting externally, yo
370371.. code :: python
371372
372373 from neo4j_graphrag.llm import CohereLLM, NoOpRateLimitHandler
373-
374+
374375 # Disable rate limiting completely
375376 llm = CohereLLM(
376377 model_name = " command-r-plus" ,
0 commit comments