fix: #886

fatelei · fatelei · commit a24c0870b654 · 2025-08-17T21:29:27.000+08:00
diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -792,6 +792,12 @@ class ModelResponse:
     vendor_id: str | None = None
     """Vendor ID as specified by the model provider. This can be used to track the specific request to the model."""
 
+    id: str | None = None
+    """Response ID as specified by the model provider. Used to populate gen_ai.response.id in OpenTelemetry."""
+
+    finish_reason: str | None = None
+    """Reason the model finished generating the response. Used to populate gen_ai.response.finish_reasons in OpenTelemetry."""
+
     def otel_events(self, settings: InstrumentationSettings) -> list[Event]:
         """Return OpenTelemetry events for the response."""
         result: list[Event] = []
diff --git a/pydantic_ai_slim/pydantic_ai/models/__init__.py b/pydantic_ai_slim/pydantic_ai/models/__init__.py
@@ -503,6 +503,8 @@ class StreamedResponse(ABC):
     _parts_manager: ModelResponsePartsManager = field(default_factory=ModelResponsePartsManager, init=False)
     _event_iterator: AsyncIterator[ModelResponseStreamEvent] | None = field(default=None, init=False)
     _usage: Usage = field(default_factory=Usage, init=False)
+    _id: str | None = field(default=None, init=False)
+    _finish_reason: str | None = field(default=None, init=False)
 
     def __aiter__(self) -> AsyncIterator[ModelResponseStreamEvent]:
         """Stream the response as an async iterable of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s."""
@@ -530,6 +532,8 @@ def get(self) -> ModelResponse:
             model_name=self.model_name,
             timestamp=self.timestamp,
             usage=self.usage(),
+            id=self._id,
+            finish_reason=self._finish_reason,
         )
 
     def usage(self) -> Usage:
diff --git a/pydantic_ai_slim/pydantic_ai/models/instrumented.py b/pydantic_ai_slim/pydantic_ai/models/instrumented.py
@@ -301,22 +301,27 @@ def _record_metrics():
 
                     events = self.instrumentation_settings.messages_to_otel_events(messages)
                     for event in self.instrumentation_settings.messages_to_otel_events([response]):
+                        choice_body: dict[str, Any] = {
+                            'index': 0,
+                            'message': event.body,
+                        }
+                        if response.finish_reason is not None:
+                            choice_body['finish_reason'] = response.finish_reason
                         events.append(
                             Event(
                                 'gen_ai.choice',
-                                body={
-                                    # TODO finish_reason
-                                    'index': 0,
-                                    'message': event.body,
-                                },
+                                body=choice_body,
                             )
                         )
-                    span.set_attributes(
-                        {
-                            **response.usage.opentelemetry_attributes(),
-                            'gen_ai.response.model': response_model,
-                        }
-                    )
+                    response_attributes = {
+                        **response.usage.opentelemetry_attributes(),
+                        'gen_ai.response.model': response_model,
+                    }
+                    if response.id is not None:
+                        response_attributes['gen_ai.response.id'] = response.id
+                    if response.finish_reason is not None:
+                        response_attributes['gen_ai.response.finish_reasons'] = [response.finish_reason]
+                    span.set_attributes(response_attributes)
                     span.update_name(f'{operation} {request_model}')
                     for event in events:
                         event.attributes = {
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -420,6 +420,8 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
             timestamp=timestamp,
             vendor_details=vendor_details,
             vendor_id=response.id,
+            id=response.id,
+            finish_reason=choice.finish_reason,
         )
 
     async def _process_streamed_response(self, response: AsyncStream[ChatCompletionChunk]) -> OpenAIStreamedResponse:
@@ -708,6 +710,8 @@ def _process_response(self, response: responses.Response) -> ModelResponse:
             model_name=response.model,
             vendor_id=response.id,
             timestamp=timestamp,
+            id=response.id,
+            finish_reason=response.status,
         )
 
     async def _process_streamed_response(
@@ -1015,11 +1019,19 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
         async for chunk in self._response:
             self._usage += _map_usage(chunk)
 
+            # Capture the response ID from the chunk
+            if chunk.id and self._id is None:
+                self._id = chunk.id
+
             try:
                 choice = chunk.choices[0]
             except IndexError:
                 continue
 
+            # Capture the finish_reason when it becomes available
+            if choice.finish_reason and self._finish_reason is None:
+                self._finish_reason = choice.finish_reason
+
             # Handle the text part of the response
             content = choice.delta.content
             if content:
@@ -1068,6 +1080,11 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
         async for chunk in self._response:
             if isinstance(chunk, responses.ResponseCompletedEvent):
                 self._usage += _map_usage(chunk.response)
+                # Capture id and finish_reason from completed response
+                if chunk.response.id and self._id is None:
+                    self._id = chunk.response.id
+                if chunk.response.status and self._finish_reason is None:
+                    self._finish_reason = chunk.response.status
 
             elif isinstance(chunk, responses.ResponseContentPartAddedEvent):
                 pass  # there's nothing we need to do here
@@ -1076,7 +1093,9 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
                 pass  # there's nothing we need to do here
 
             elif isinstance(chunk, responses.ResponseCreatedEvent):
-                pass  # there's nothing we need to do here
+                # Capture id from created response
+                if chunk.response.id and self._id is None:
+                    self._id = chunk.response.id
 
             elif isinstance(chunk, responses.ResponseFailedEvent):  # pragma: no cover
                 self._usage += _map_usage(chunk.response)
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -147,13 +147,18 @@ def get_mock_chat_completion_kwargs(async_open_ai: AsyncOpenAI) -> list[dict[str
 
 
 def completion_message(
-    message: ChatCompletionMessage, *, usage: CompletionUsage | None = None, logprobs: ChoiceLogprobs | None = None
+    message: ChatCompletionMessage,
+    *,
+    usage: CompletionUsage | None = None,
+    logprobs: ChoiceLogprobs | None = None,
+    response_id: str = '123',
+    finish_reason: str = 'stop'
 ) -> chat.ChatCompletion:
-    choices = [Choice(finish_reason='stop', index=0, message=message)]
+    choices = [Choice(finish_reason=finish_reason, index=0, message=message)]
     if logprobs:
-        choices = [Choice(finish_reason='stop', index=0, message=message, logprobs=logprobs)]
+        choices = [Choice(finish_reason=finish_reason, index=0, message=message, logprobs=logprobs)]
     return chat.ChatCompletion(
-        id='123',
+        id=response_id,
         choices=choices,
         created=1704067200,  # 2024-01-01
         model='gpt-4o-123',
@@ -189,6 +194,8 @@ async def test_request_simple_success(allow_model_requests: None):
                 model_name='gpt-4o-123',
                 timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
                 vendor_id='123',
+                id='123',
+                finish_reason='stop',
             ),
             ModelRequest(parts=[UserPromptPart(content='hello', timestamp=IsNow(tz=timezone.utc))]),
             ModelResponse(
@@ -197,6 +204,8 @@ async def test_request_simple_success(allow_model_requests: None):
                 model_name='gpt-4o-123',
                 timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
                 vendor_id='123',
+                id='123',
+                finish_reason='stop',
             ),
         ]
     )
@@ -234,6 +243,36 @@ async def test_request_simple_usage(allow_model_requests: None):
     assert result.usage() == snapshot(Usage(requests=1, request_tokens=2, response_tokens=1, total_tokens=3))
 
 
+async def test_id_and_finish_reason_fields(allow_model_requests: None):
+    """Test that id and finish_reason fields are properly populated in ModelResponse."""
+    # Test with different finish reasons
+    test_cases = [
+        ('stop', 'response-id-1'),
+        ('length', 'response-id-2'),
+        ('tool_calls', 'response-id-3'),
+    ]
+
+    for finish_reason, response_id in test_cases:
+        c = completion_message(
+            ChatCompletionMessage(content='test response', role='assistant'),
+            response_id=response_id,
+            finish_reason=finish_reason,
+        )
+        mock_client = MockOpenAI.create_mock(c)
+        m = OpenAIModel('gpt-4o', provider=OpenAIProvider(openai_client=mock_client))
+        agent = Agent(m)
+
+        result = await agent.run('test')
+        assert result.output == 'test response'
+
+        # Check that the ModelResponse contains the correct id and finish_reason
+        messages = result.all_messages()
+        model_response = messages[1]  # Second message should be the model response
+        assert isinstance(model_response, ModelResponse)
+        assert model_response.id == response_id
+        assert model_response.finish_reason == finish_reason
+
+
 async def test_request_structured_response(allow_model_requests: None):
     c = completion_message(
         ChatCompletionMessage(
@@ -420,9 +459,9 @@ async def get_location(loc_name: str) -> str:
 FinishReason = Literal['stop', 'length', 'tool_calls', 'content_filter', 'function_call']
 
 
-def chunk(delta: list[ChoiceDelta], finish_reason: FinishReason | None = None) -> chat.ChatCompletionChunk:
+def chunk(delta: list[ChoiceDelta], finish_reason: FinishReason | None = None, chunk_id: str = 'x') -> chat.ChatCompletionChunk:
     return chat.ChatCompletionChunk(
-        id='x',
+        id=chunk_id,
         choices=[
             ChunkChoice(index=index, delta=delta, finish_reason=finish_reason) for index, delta in enumerate(delta)
         ],
@@ -433,8 +472,8 @@ def chunk(delta: list[ChoiceDelta], finish_reason: FinishReason | None = None) -
     )
 
 
-def text_chunk(text: str, finish_reason: FinishReason | None = None) -> chat.ChatCompletionChunk:
-    return chunk([ChoiceDelta(content=text, role='assistant')], finish_reason=finish_reason)
+def text_chunk(text: str, finish_reason: FinishReason | None = None, chunk_id: str = 'x') -> chat.ChatCompletionChunk:
+    return chunk([ChoiceDelta(content=text, role='assistant')], finish_reason=finish_reason, chunk_id=chunk_id)
 
 
 async def test_stream_text(allow_model_requests: None):
@@ -550,6 +589,55 @@ async def test_stream_structured_finish_reason(allow_model_requests: None):
         assert result.is_complete
 
 
+async def test_stream_id_and_finish_reason_fields(allow_model_requests: None):
+    """Test that streaming responses properly track id and finish_reason fields."""
+    # Test streaming text response
+    stream = [
+        text_chunk('hello ', chunk_id='stream-response-123'),
+        text_chunk('world', chunk_id='stream-response-123'),
+        text_chunk('!', finish_reason='stop', chunk_id='stream-response-123'),
+    ]
+    mock_client = MockOpenAI.create_mock_stream(stream)
+    m = OpenAIModel('gpt-4o', provider=OpenAIProvider(openai_client=mock_client))
+    agent = Agent(m)
+
+    async with agent.run_stream('test') as result:
+        assert not result.is_complete
+        text_chunks = [c async for c in result.stream_text(debounce_by=None)]
+        assert text_chunks == ['hello ', 'hello world', 'hello world!']
+        assert result.is_complete
+
+        # Get the final messages and check the ModelResponse
+        messages = result.all_messages()
+        model_response = messages[1]  # Second message should be the model response
+        assert isinstance(model_response, ModelResponse)
+        assert model_response.id == 'stream-response-123'
+        assert model_response.finish_reason == 'stop'
+
+    # Test streaming with structured output and different finish reason
+    stream = [
+        struc_chunk('final_result', None),
+        chunk([ChoiceDelta(tool_calls=[ChoiceDeltaToolCall(index=0, function=ChoiceDeltaToolCallFunction(name=None, arguments='{"first": "Test"'))])], chunk_id='struct-response-456'),
+        chunk([ChoiceDelta(tool_calls=[ChoiceDeltaToolCall(index=0, function=ChoiceDeltaToolCallFunction(name=None, arguments='}'))])], finish_reason='length', chunk_id='struct-response-456'),
+    ]
+    mock_client = MockOpenAI.create_mock_stream(stream)
+    m = OpenAIModel('gpt-4o', provider=OpenAIProvider(openai_client=mock_client))
+    agent = Agent(m, output_type=MyTypedDict)
+
+    async with agent.run_stream('test') as result:
+        assert not result.is_complete
+        structured_chunks = [dict(c) async for c in result.stream(debounce_by=None)]
+        assert structured_chunks == [{'first': 'Test'}, {'first': 'Test'}]
+        assert result.is_complete
+
+        # Get the final messages and check the ModelResponse
+        messages = result.all_messages()
+        model_response = messages[1]  # Second message should be the model response
+        assert isinstance(model_response, ModelResponse)
+        assert model_response.id == 'struct-response-456'
+        assert model_response.finish_reason == 'length'
+
+
 async def test_stream_native_output(allow_model_requests: None):
     stream = [
         chunk([]),
diff --git a/uv.lock b/uv.lock