From a7ab4f9e22a696b497f14384dbada3871f097aef Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 8 Oct 2025 11:13:58 +0200 Subject: [PATCH 1/5] Do not force users to implement _invoke/_ainvoke to prevent breaking changes + add deprecation warning --- docs/source/user_guide_rag.rst | 12 ++++-------- src/neo4j_graphrag/llm/base.py | 28 ++++++++++++++++++++++++---- tests/unit/llm/test_base.py | 17 +++++++++++------ 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/docs/source/user_guide_rag.rst b/docs/source/user_guide_rag.rst index ce0e4fc8..22233f5b 100644 --- a/docs/source/user_guide_rag.rst +++ b/docs/source/user_guide_rag.rst @@ -265,21 +265,17 @@ Here's an example using the Python Ollama client: import ollama from neo4j_graphrag.llm import LLMInterface, LLMResponse + from neo4j_graphrag.types import LLMMessage class OllamaLLM(LLMInterface): - def invoke(self, input: str) -> LLMResponse: - response = ollama.chat(model=self.model_name, messages=[ - { - 'role': 'user', - 'content': input, - }, - ]) + def _invoke(self, input: list[LLMMessage]) -> LLMResponse: + response = ollama.chat(model=self.model_name, messages=input) return LLMResponse( content=response["message"]["content"] ) - async def ainvoke(self, input: str) -> LLMResponse: + async def _ainvoke(self, input: list[LLMMessage]) -> LLMResponse: return self.invoke(input) # TODO: implement async with ollama.AsyncClient diff --git a/src/neo4j_graphrag/llm/base.py b/src/neo4j_graphrag/llm/base.py index b66713b2..92d3eb84 100644 --- a/src/neo4j_graphrag/llm/base.py +++ b/src/neo4j_graphrag/llm/base.py @@ -14,7 +14,7 @@ # limitations under the License. from __future__ import annotations -from abc import ABC, abstractmethod +import warnings from typing import Any, List, Optional, Sequence, Union from pydantic import ValidationError @@ -36,7 +36,7 @@ from ..exceptions import LLMGenerationError -class LLMInterface(ABC): +class LLMInterface: """Interface for large language models. Args: @@ -68,6 +68,16 @@ def invoke( message_history: Optional[Union[List[LLMMessage], MessageHistory]] = None, system_instruction: Optional[str] = None, ) -> LLMResponse: + if message_history: + warnings.warn( + "Using 'message_history' in the llm.invoke method is deprecated. Please use invoke(list[LLMMessage]) instead.", + DeprecationWarning, + ) + if system_instruction: + warnings.warn( + "Using 'system_instruction' in the llm.invoke method is deprecated. Please use invoke(list[LLMMessage]) instead.", + DeprecationWarning, + ) try: messages = legacy_inputs_to_messages( input, message_history, system_instruction @@ -76,7 +86,6 @@ def invoke( raise LLMGenerationError("Input validation failed") from e return self._invoke(messages) - @abstractmethod def _invoke( self, input: list[LLMMessage], @@ -92,6 +101,7 @@ def _invoke( Raises: LLMGenerationError: If anything goes wrong. """ + raise NotImplementedError() @async_rate_limit_handler async def ainvoke( @@ -100,10 +110,19 @@ async def ainvoke( message_history: Optional[Union[List[LLMMessage], MessageHistory]] = None, system_instruction: Optional[str] = None, ) -> LLMResponse: + if message_history: + warnings.warn( + "Using 'message_history' in the llm.ainvoke method is deprecated. Please use invoke(list[LLMMessage]) instead.", + DeprecationWarning, + ) + if system_instruction: + warnings.warn( + "Using 'system_instruction' in the llm.ainvoke method is deprecated. Please use invoke(list[LLMMessage]) instead.", + DeprecationWarning, + ) messages = legacy_inputs_to_messages(input, message_history, system_instruction) return await self._ainvoke(messages) - @abstractmethod async def _ainvoke( self, input: list[LLMMessage], @@ -119,6 +138,7 @@ async def _ainvoke( Raises: LLMGenerationError: If anything goes wrong. """ + raise NotImplementedError() @rate_limit_handler def invoke_with_tools( diff --git a/tests/unit/llm/test_base.py b/tests/unit/llm/test_base.py index 4eff7cb9..695cad9f 100644 --- a/tests/unit/llm/test_base.py +++ b/tests/unit/llm/test_base.py @@ -20,16 +20,11 @@ @fixture(scope="module") # type: ignore[misc] def llm_interface() -> Generator[Type[LLMInterface], None, None]: - real_abstract_methods = LLMInterface.__abstractmethods__ - LLMInterface.__abstractmethods__ = frozenset() - class CustomLLMInterface(LLMInterface): pass yield CustomLLMInterface - LLMInterface.__abstractmethods__ = real_abstract_methods - @patch("neo4j_graphrag.llm.base.legacy_inputs_to_messages") def test_base_llm_interface_invoke_with_input_as_str( @@ -52,7 +47,8 @@ def test_base_llm_interface_invoke_with_input_as_str( system_instruction = "You are a genius." with patch.object(llm, "_invoke") as mock_invoke: - llm.invoke(question, message_history, system_instruction) + with pytest.warns(DeprecationWarning) as record: + llm.invoke(question, message_history, system_instruction) mock_invoke.assert_called_once_with( [ LLMMessage( @@ -66,6 +62,15 @@ def test_base_llm_interface_invoke_with_input_as_str( message_history, system_instruction, ) + assert len(record) == 2 + assert ( + "Using 'message_history' in the llm.invoke method is deprecated" + in record[0].message.args[0] # type: ignore[union-attr] + ) + assert ( + "Using 'system_instruction' in the llm.invoke method is deprecated" + in record[1].message.args[0] # type: ignore[union-attr] + ) @patch("neo4j_graphrag.llm.base.legacy_inputs_to_messages") From fcc99e07ab302d7fa4fe99a785420415a5f70685 Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 8 Oct 2025 13:42:25 +0200 Subject: [PATCH 2/5] Update Embedder for the same reason --- examples/customize/embeddings/custom_embeddings.py | 1 + src/neo4j_graphrag/embeddings/base.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/customize/embeddings/custom_embeddings.py b/examples/customize/embeddings/custom_embeddings.py index e7712735..6eac1777 100644 --- a/examples/customize/embeddings/custom_embeddings.py +++ b/examples/customize/embeddings/custom_embeddings.py @@ -6,6 +6,7 @@ class CustomEmbeddings(Embedder): def __init__(self, dimension: int = 10, **kwargs: Any): + super().__init__(**kwargs) self.dimension = dimension def _embed_query(self, input: str) -> list[float]: diff --git a/src/neo4j_graphrag/embeddings/base.py b/src/neo4j_graphrag/embeddings/base.py index 02e5b7a5..1b261568 100644 --- a/src/neo4j_graphrag/embeddings/base.py +++ b/src/neo4j_graphrag/embeddings/base.py @@ -14,7 +14,6 @@ # limitations under the License. from __future__ import annotations -from abc import ABC, abstractmethod from typing import Optional from neo4j_graphrag.utils.rate_limit import ( @@ -24,7 +23,7 @@ ) -class Embedder(ABC): +class Embedder: """ Interface for embedding models. An embedder passed into a retriever must implement this interface. @@ -51,7 +50,6 @@ def embed_query(self, text: str) -> list[float]: """ return self._embed_query(text) - @abstractmethod def _embed_query(self, text: str) -> list[float]: """Embed query text. @@ -61,3 +59,4 @@ def _embed_query(self, text: str) -> list[float]: Returns: list[float]: A vector embedding. """ + raise NotImplementedError() From af966cc6a0cfb0e5f6a43f5808affc523fe16ff9 Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 8 Oct 2025 13:43:04 +0200 Subject: [PATCH 3/5] Doc update --- docs/source/api.rst | 7 +++++++ docs/source/types.rst | 24 ++++++++++++++++++++++++ docs/source/user_guide_rag.rst | 4 ++++ 3 files changed, 35 insertions(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index f891b4de..ee5b5c6a 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -214,6 +214,13 @@ Text2CypherRetriever :members: search +ToolsRetriever +============== + +.. autoclass:: neo4j_graphrag.retrievers.ToolsRetriever + :members: search + + ******************* External Retrievers ******************* diff --git a/docs/source/types.rst b/docs/source/types.rst index 267e310d..bbd346fc 100644 --- a/docs/source/types.rst +++ b/docs/source/types.rst @@ -32,6 +32,30 @@ LLMMessage =========== .. autoclass:: neo4j_graphrag.types.LLMMessage + :members: + :undoc-members: + +Tool +==== + +.. autoclass:: neo4j_graphrag.tool.Tool + + +ToolParameter +============= + +.. autoclass:: neo4j_graphrag.tool.ToolParameter + + +ObjectParameter +=============== + +.. autoclass:: neo4j_graphrag.tool.ObjectParameter + +ParameterType +============= + +.. autoenum:: neo4j_graphrag.tool.ParameterType RagResultModel diff --git a/docs/source/user_guide_rag.rst b/docs/source/user_guide_rag.rst index 22233f5b..ea412f2a 100644 --- a/docs/source/user_guide_rag.rst +++ b/docs/source/user_guide_rag.rst @@ -253,6 +253,10 @@ Its interface is compatible with our `GraphRAG` interface, facilitating integrat It is however not mandatory to use LangChain. +.. warning:: ToolsRetriever + + LangChain models are not compatible with the :ref:`toolsretriever`. + Using a Custom Model -------------------- From cd97efc99781d02664a09942e81fdab3a8786ed0 Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 8 Oct 2025 15:37:31 +0200 Subject: [PATCH 4/5] Also free disk space in PR e2e tests --- .github/workflows/pr-e2e-tests.yaml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-e2e-tests.yaml b/.github/workflows/pr-e2e-tests.yaml index 3baccb2d..9fd5304d 100644 --- a/.github/workflows/pr-e2e-tests.yaml +++ b/.github/workflows/pr-e2e-tests.yaml @@ -51,10 +51,17 @@ jobs: steps: - name: Check out repository code uses: actions/checkout@v4 - - name: Docker Prune + - name: Free up disk space (ubuntu-latest) run: | - docker system prune -af - docker volume prune -f + sudo rm -rf /usr/local/lib/android \ + /usr/share/dotnet \ + /opt/ghc \ + /opt/hostedtoolcache + docker system prune -af || true + docker volume prune -f || true + docker builder prune -af || true + sudo apt-get clean || true + df -h - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: From c52097877268d57b7017a4f88e8df3b8e208edd6 Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 8 Oct 2025 16:01:26 +0200 Subject: [PATCH 5/5] Document metadata can have other types than str --- src/neo4j_graphrag/experimental/components/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neo4j_graphrag/experimental/components/types.py b/src/neo4j_graphrag/experimental/components/types.py index b5ce0770..3de4c68d 100644 --- a/src/neo4j_graphrag/experimental/components/types.py +++ b/src/neo4j_graphrag/experimental/components/types.py @@ -36,7 +36,7 @@ class DocumentInfo(DataModel): """ path: str - metadata: Optional[Dict[str, str]] = None + metadata: Optional[Dict[str, Any]] = None uid: str = Field(default_factory=lambda: str(uuid.uuid4())) document_type: Optional[str] = None