From 4d72db6b091300bdff7147d6abf5d720a1c8ffa4 Mon Sep 17 00:00:00 2001
From: David Condrey <davidcondrey@me.com>
Date: Tue, 4 Nov 2025 17:45:16 -0800
Subject: [PATCH 1/2] feat: generate final answer when max_agent_steps reached

---
 haystack/components/agents/agent.py           | 99 +++++++++++++++++++
 ...-answer-on-max-steps-a1b2c3d4e5f6g7h8.yaml |  8 ++
 test/components/agents/test_agent.py          | 66 +++++++++++++
 3 files changed, 173 insertions(+)
 create mode 100644 releasenotes/notes/agent-final-answer-on-max-steps-a1b2c3d4e5f6g7h8.yaml

diff --git a/haystack/components/agents/agent.py b/haystack/components/agents/agent.py
index b5e5b1a1e4..45e2641e12 100644
--- a/haystack/components/agents/agent.py
+++ b/haystack/components/agents/agent.py
@@ -155,6 +155,7 @@ def __init__(
         exit_conditions: Optional[list[str]] = None,
         state_schema: Optional[dict[str, Any]] = None,
         max_agent_steps: int = 100,
+        final_answer_on_max_steps: bool = True,
         streaming_callback: Optional[StreamingCallbackT] = None,
         raise_on_tool_invocation_failure: bool = False,
         tool_invoker_kwargs: Optional[dict[str, Any]] = None,
@@ -171,6 +172,10 @@ def __init__(
         :param state_schema: The schema for the runtime state used by the tools.
         :param max_agent_steps: Maximum number of steps the agent will run before stopping. Defaults to 100.
             If the agent exceeds this number of steps, it will stop and return the current state.
+        :param final_answer_on_max_steps: If True, generates a final text response when max_agent_steps
+            is reached and the last message is a tool result. This ensures the agent always returns a
+            natural language response instead of raw tool output. Adds one additional LLM call that doesn't
+            count toward max_agent_steps. Defaults to True.
         :param streaming_callback: A callback that will be invoked when a response is streamed from the LLM.
             The same callback can be configured to emit tool results when a tool is called.
         :param raise_on_tool_invocation_failure: Should the agent raise an exception when a tool invocation fails?
@@ -213,6 +218,7 @@ def __init__(
         self.system_prompt = system_prompt
         self.exit_conditions = exit_conditions
         self.max_agent_steps = max_agent_steps
+        self.final_answer_on_max_steps = final_answer_on_max_steps
         self.raise_on_tool_invocation_failure = raise_on_tool_invocation_failure
         self.streaming_callback = streaming_callback
 
@@ -520,6 +526,95 @@ def _check_tool_invoker_breakpoint(
                 llm_messages=execution_context.state.data["messages"][-1:], pipeline_snapshot=pipeline_snapshot
             )
 
+    def _generate_final_answer(self, exe_context: _ExecutionContext, span) -> None:
+        """Generate a final text response when max steps is reached with a tool result as last message."""
+        if not self.final_answer_on_max_steps or not exe_context.state.data.get("messages"):
+            return
+
+        last_msg = exe_context.state.data["messages"][-1]
+        if not last_msg.tool_call_result:
+            return
+
+        try:
+            logger.info("Generating final text response after max steps reached.")
+
+            # Add system message for context
+            final_prompt = ChatMessage.from_system(
+                "You have reached the maximum number of reasoning steps. "
+                "Based on the information gathered so far, provide a final answer "
+                "to the user's question. Tools are no longer available."
+            )
+
+            # Make final call with tools disabled
+            final_inputs = {k: v for k, v in exe_context.chat_generator_inputs.items() if k != 'tools'}
+            final_result = self.chat_generator.run(
+                messages=exe_context.state.data["messages"] + [final_prompt],
+                tools=[],
+                **final_inputs
+            )
+
+            # Append final response
+            if final_result and "replies" in final_result:
+                for msg in final_result["replies"]:
+                    exe_context.state.data["messages"].append(msg)
+
+                span.set_tag("haystack.agent.final_answer_generated", True)
+
+        except Exception as e:
+            logger.warning(
+                "Failed to generate final answer: {error}. Returning with tool result as last message.",
+                error=str(e)
+            )
+            span.set_tag("haystack.agent.final_answer_failed", True)
+
+    async def _generate_final_answer_async(self, exe_context: _ExecutionContext, span) -> None:
+        """Async version: Generate a final text response when max steps is reached with a tool result as last message."""
+        if not self.final_answer_on_max_steps or not exe_context.state.data.get("messages"):
+            return
+
+        last_msg = exe_context.state.data["messages"][-1]
+        if not last_msg.tool_call_result:
+            return
+
+        try:
+            logger.info("Generating final text response after max steps reached.")
+
+            # Add system message for context
+            final_prompt = ChatMessage.from_system(
+                "You have reached the maximum number of reasoning steps. "
+                "Based on the information gathered so far, provide a final answer "
+                "to the user's question. Tools are no longer available."
+            )
+
+            # Make final call with tools disabled using AsyncPipeline
+            final_inputs = {k: v for k, v in exe_context.chat_generator_inputs.items() if k != 'tools'}
+            final_inputs["tools"] = []
+
+            final_result = await AsyncPipeline._run_component_async(
+                component_name="chat_generator",
+                component={"instance": self.chat_generator},
+                component_inputs={
+                    "messages": exe_context.state.data["messages"] + [final_prompt],
+                    **final_inputs,
+                },
+                component_visits=exe_context.component_visits,
+                parent_span=span,
+            )
+
+            # Append final response
+            if final_result and "replies" in final_result:
+                for msg in final_result["replies"]:
+                    exe_context.state.data["messages"].append(msg)
+
+                span.set_tag("haystack.agent.final_answer_generated", True)
+
+        except Exception as e:
+            logger.warning(
+                "Failed to generate final answer: {error}. Returning with tool result as last message.",
+                error=str(e)
+            )
+            span.set_tag("haystack.agent.final_answer_failed", True)
+
     def run(  # noqa: PLR0915
         self,
         messages: list[ChatMessage],
@@ -677,6 +772,8 @@ def run(  # noqa: PLR0915
                     "Agent reached maximum agent steps of {max_agent_steps}, stopping.",
                     max_agent_steps=self.max_agent_steps,
                 )
+                self._generate_final_answer(exe_context, span)
+
             span.set_content_tag("haystack.agent.output", exe_context.state.data)
             span.set_tag("haystack.agent.steps_taken", exe_context.counter)
 
@@ -820,6 +917,8 @@ async def run_async(
                     "Agent reached maximum agent steps of {max_agent_steps}, stopping.",
                     max_agent_steps=self.max_agent_steps,
                 )
+                await self._generate_final_answer_async(exe_context, span)
+
             span.set_content_tag("haystack.agent.output", exe_context.state.data)
             span.set_tag("haystack.agent.steps_taken", exe_context.counter)
 
diff --git a/releasenotes/notes/agent-final-answer-on-max-steps-a1b2c3d4e5f6g7h8.yaml b/releasenotes/notes/agent-final-answer-on-max-steps-a1b2c3d4e5f6g7h8.yaml
new file mode 100644
index 0000000000..fa3afa5edf
--- /dev/null
+++ b/releasenotes/notes/agent-final-answer-on-max-steps-a1b2c3d4e5f6g7h8.yaml
@@ -0,0 +1,8 @@
+---
+enhancements:
+  - |
+    Add `final_answer_on_max_steps` parameter to Agent component. When enabled (default: True),
+    the agent will generate a final natural language response if it reaches max_agent_steps with
+    a tool result as the last message. This ensures the agent always returns a user-friendly text
+    response instead of raw tool output, improving user experience when step limits are reached.
+    The feature adds one additional LLM call that doesn't count toward max_agent_steps.
diff --git a/test/components/agents/test_agent.py b/test/components/agents/test_agent.py
index e896b715dd..ae35e5b403 100644
--- a/test/components/agents/test_agent.py
+++ b/test/components/agents/test_agent.py
@@ -741,6 +741,72 @@ def test_exceed_max_steps(self, monkeypatch, weather_tool, caplog):
             agent.run([ChatMessage.from_user("Hello")])
             assert "Agent reached maximum agent steps" in caplog.text
 
+    def test_final_answer_on_max_steps_enabled(self, monkeypatch, weather_tool):
+        """Test that final answer is generated when max steps is reached with tool result as last message."""
+        monkeypatch.setenv("OPENAI_API_KEY", "fake-key")
+        generator = OpenAIChatGenerator()
+
+        tool_result_msg = ChatMessage.from_tool(
+            tool_result="Weather in Berlin: 20C",
+            origin=ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"}),
+        )
+
+        # Mock responses: first returns tool call, then after tools run, we hit max steps
+        agent = Agent(chat_generator=generator, tools=[weather_tool], max_agent_steps=1, final_answer_on_max_steps=True)
+        agent.warm_up()
+
+        call_count = 0
+
+        def mock_run(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                # First call: LLM wants to call tool
+                return {"replies": [ChatMessage.from_assistant(
+                    tool_calls=[ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"})]
+                )]}
+            else:
+                # Final answer call (no tools available)
+                return {"replies": [ChatMessage.from_assistant("Based on the weather data, it's 20C in Berlin.")]}
+
+        agent.chat_generator.run = mock_run
+
+        result = agent.run([ChatMessage.from_user("What's the weather in Berlin?")])
+
+        # Last message should be text response, not tool result
+        assert result["last_message"].text
+        assert "Berlin" in result["last_message"].text
+
+    def test_final_answer_on_max_steps_disabled(self, monkeypatch, weather_tool):
+        """Test that no final answer is generated when final_answer_on_max_steps=False."""
+        monkeypatch.setenv("OPENAI_API_KEY", "fake-key")
+        generator = OpenAIChatGenerator()
+
+        agent = Agent(
+            chat_generator=generator,
+            tools=[weather_tool],
+            max_agent_steps=1,
+            final_answer_on_max_steps=False
+        )
+        agent.warm_up()
+
+        call_count = 0
+
+        def mock_run(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            # Always return tool call to ensure we'd end with tool result
+            return {"replies": [ChatMessage.from_assistant(
+                tool_calls=[ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"})]
+            )]}
+
+        agent.chat_generator.run = mock_run
+
+        result = agent.run([ChatMessage.from_user("What's the weather?")])
+
+        # Should have ended without final answer call (only 1 LLM call, not 2)
+        assert call_count == 1
+
     def test_exit_conditions_checked_across_all_llm_messages(self, monkeypatch, weather_tool):
         monkeypatch.setenv("OPENAI_API_KEY", "fake-key")
         generator = OpenAIChatGenerator()

From d2dcf7a3faffe7706cf3df17521e9edf71b2fb9c Mon Sep 17 00:00:00 2001
From: David Condrey <davidcondrey@me.com>
Date: Tue, 4 Nov 2025 18:07:47 -0800
Subject: [PATCH 2/2] chore: Run pre-commit hooks

---
 haystack/components/agents/agent.py  | 23 ++++++++------------
 test/components/agents/test_agent.py | 32 ++++++++++++++--------------
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/haystack/components/agents/agent.py b/haystack/components/agents/agent.py
index 45e2641e12..e0ccab28f5 100644
--- a/haystack/components/agents/agent.py
+++ b/haystack/components/agents/agent.py
@@ -546,11 +546,9 @@ def _generate_final_answer(self, exe_context: _ExecutionContext, span) -> None:
             )
 
             # Make final call with tools disabled
-            final_inputs = {k: v for k, v in exe_context.chat_generator_inputs.items() if k != 'tools'}
+            final_inputs = {k: v for k, v in exe_context.chat_generator_inputs.items() if k != "tools"}
             final_result = self.chat_generator.run(
-                messages=exe_context.state.data["messages"] + [final_prompt],
-                tools=[],
-                **final_inputs
+                messages=exe_context.state.data["messages"] + [final_prompt], tools=[], **final_inputs
             )
 
             # Append final response
@@ -562,13 +560,14 @@ def _generate_final_answer(self, exe_context: _ExecutionContext, span) -> None:
 
         except Exception as e:
             logger.warning(
-                "Failed to generate final answer: {error}. Returning with tool result as last message.",
-                error=str(e)
+                "Failed to generate final answer: {error}. Returning with tool result as last message.", error=str(e)
             )
             span.set_tag("haystack.agent.final_answer_failed", True)
 
     async def _generate_final_answer_async(self, exe_context: _ExecutionContext, span) -> None:
-        """Async version: Generate a final text response when max steps is reached with a tool result as last message."""
+        """
+        Async version: Generate a final text response when max steps is reached with tool result as last message.
+        """
         if not self.final_answer_on_max_steps or not exe_context.state.data.get("messages"):
             return
 
@@ -587,16 +586,13 @@ async def _generate_final_answer_async(self, exe_context: _ExecutionContext, spa
             )
 
             # Make final call with tools disabled using AsyncPipeline
-            final_inputs = {k: v for k, v in exe_context.chat_generator_inputs.items() if k != 'tools'}
+            final_inputs = {k: v for k, v in exe_context.chat_generator_inputs.items() if k != "tools"}
             final_inputs["tools"] = []
 
             final_result = await AsyncPipeline._run_component_async(
                 component_name="chat_generator",
                 component={"instance": self.chat_generator},
-                component_inputs={
-                    "messages": exe_context.state.data["messages"] + [final_prompt],
-                    **final_inputs,
-                },
+                component_inputs={"messages": exe_context.state.data["messages"] + [final_prompt], **final_inputs},
                 component_visits=exe_context.component_visits,
                 parent_span=span,
             )
@@ -610,8 +606,7 @@ async def _generate_final_answer_async(self, exe_context: _ExecutionContext, spa
 
         except Exception as e:
             logger.warning(
-                "Failed to generate final answer: {error}. Returning with tool result as last message.",
-                error=str(e)
+                "Failed to generate final answer: {error}. Returning with tool result as last message.", error=str(e)
             )
             span.set_tag("haystack.agent.final_answer_failed", True)
 
diff --git a/test/components/agents/test_agent.py b/test/components/agents/test_agent.py
index ae35e5b403..a7cb715118 100644
--- a/test/components/agents/test_agent.py
+++ b/test/components/agents/test_agent.py
@@ -746,11 +746,6 @@ def test_final_answer_on_max_steps_enabled(self, monkeypatch, weather_tool):
         monkeypatch.setenv("OPENAI_API_KEY", "fake-key")
         generator = OpenAIChatGenerator()
 
-        tool_result_msg = ChatMessage.from_tool(
-            tool_result="Weather in Berlin: 20C",
-            origin=ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"}),
-        )
-
         # Mock responses: first returns tool call, then after tools run, we hit max steps
         agent = Agent(chat_generator=generator, tools=[weather_tool], max_agent_steps=1, final_answer_on_max_steps=True)
         agent.warm_up()
@@ -762,9 +757,13 @@ def mock_run(*args, **kwargs):
             call_count += 1
             if call_count == 1:
                 # First call: LLM wants to call tool
-                return {"replies": [ChatMessage.from_assistant(
-                    tool_calls=[ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"})]
-                )]}
+                return {
+                    "replies": [
+                        ChatMessage.from_assistant(
+                            tool_calls=[ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"})]
+                        )
+                    ]
+                }
             else:
                 # Final answer call (no tools available)
                 return {"replies": [ChatMessage.from_assistant("Based on the weather data, it's 20C in Berlin.")]}
@@ -783,10 +782,7 @@ def test_final_answer_on_max_steps_disabled(self, monkeypatch, weather_tool):
         generator = OpenAIChatGenerator()
 
         agent = Agent(
-            chat_generator=generator,
-            tools=[weather_tool],
-            max_agent_steps=1,
-            final_answer_on_max_steps=False
+            chat_generator=generator, tools=[weather_tool], max_agent_steps=1, final_answer_on_max_steps=False
         )
         agent.warm_up()
 
@@ -796,13 +792,17 @@ def mock_run(*args, **kwargs):
             nonlocal call_count
             call_count += 1
             # Always return tool call to ensure we'd end with tool result
-            return {"replies": [ChatMessage.from_assistant(
-                tool_calls=[ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"})]
-            )]}
+            return {
+                "replies": [
+                    ChatMessage.from_assistant(
+                        tool_calls=[ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"})]
+                    )
+                ]
+            }
 
         agent.chat_generator.run = mock_run
 
-        result = agent.run([ChatMessage.from_user("What's the weather?")])
+        agent.run([ChatMessage.from_user("What's the weather?")])
 
         # Should have ended without final answer call (only 1 LLM call, not 2)
         assert call_count == 1