Add custom reasoning field support to OpenAI model profiles (#3536)

ZeroAurora · web-flow · commit 0de174fe5654 · 2025-12-02T15:51:14.000-06:00
diff --git a/docs/thinking.md b/docs/thinking.md
@@ -11,6 +11,10 @@ See the sections below for how to enable thinking for each provider.
 When using the [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel], text output inside `<think>` tags are converted to [`ThinkingPart`][pydantic_ai.messages.ThinkingPart] objects.
 You can customize the tags using the [`thinking_tags`][pydantic_ai.profiles.ModelProfile.thinking_tags] field on the [model profile](models/openai.md#model-profile).
 
+Some [OpenAI-compatible model providers](models/openai.md#openai-compatible-models) might also support native thinking parts that are not delimited by tags. Instead, they are sent and received as separate, custom fields in the API. Typically, if you are calling the model via the `<provider>:<model>` shorthand, Pydantic AI handles it for you. Nonetheless, you can still configure the fields with [`openai_chat_thinking_field`][pydantic_ai.profiles.openai.OpenAIModelProfile.openai_chat_thinking_field].
+
+If your provider recommends to send back these custom fields not changed, for caching or interleaved thinking benefits, you can also achieve this with [`openai_chat_send_back_thinking_parts`][pydantic_ai.profiles.openai.OpenAIModelProfile.openai_chat_send_back_thinking_parts].
+
 ### OpenAI Responses
 
 The [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel] can generate native thinking parts.
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -637,20 +637,28 @@ def _process_thinking(self, message: chat.ChatCompletionMessage) -> list[Thinkin
 
         This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
         """
+        profile = OpenAIModelProfile.from_profile(self.profile)
+        custom_field = profile.openai_chat_thinking_field
         items: list[ThinkingPart] = []
 
-        # The `reasoning_content` field is only present in DeepSeek models.
+        # Prefer the configured custom reasoning field, if present in profile.
+        # Fall back to built-in fields if no custom field result was found.
+
+        # The `reasoning_content` field is typically present in DeepSeek and Moonshot models.
         # https://api-docs.deepseek.com/guides/reasoning_model
-        if reasoning_content := getattr(message, 'reasoning_content', None):
-            items.append(ThinkingPart(id='reasoning_content', content=reasoning_content, provider_name=self.system))
 
-        # The `reasoning` field is only present in gpt-oss via Ollama and OpenRouter.
+        # The `reasoning` field is typically present in gpt-oss via Ollama and OpenRouter.
         # - https://cookbook.openai.com/articles/gpt-oss/handle-raw-cot#chat-completions-api
         # - https://openrouter.ai/docs/use-cases/reasoning-tokens#basic-usage-with-reasoning-tokens
-        if reasoning := getattr(message, 'reasoning', None):
-            items.append(ThinkingPart(id='reasoning', content=reasoning, provider_name=self.system))
+        for field_name in (custom_field, 'reasoning', 'reasoning_content'):
+            if not field_name:
+                continue
+            reasoning: str | None = getattr(message, field_name, None)
+            if reasoning:  # pragma: no branch
+                items.append(ThinkingPart(id=field_name, content=reasoning, provider_name=self.system))
+                return items
 
-        return items
+        return items or None
 
     async def _process_streamed_response(
         self, response: AsyncStream[ChatCompletionChunk], model_request_parameters: ModelRequestParameters
@@ -726,6 +734,7 @@ class _MapModelResponseContext:
         _model: OpenAIChatModel
 
         texts: list[str] = field(default_factory=list)
+        thinkings: list[str] = field(default_factory=list)
         tool_calls: list[ChatCompletionMessageFunctionToolCallParam] = field(default_factory=list)
 
         def map_assistant_message(self, message: ModelResponse) -> chat.ChatCompletionAssistantMessageParam:
@@ -753,10 +762,15 @@ def _into_message_param(self) -> chat.ChatCompletionAssistantMessageParam:
             Returns:
                 An OpenAI `ChatCompletionAssistantMessageParam` object representing the assistant's response.
             """
+            profile = OpenAIModelProfile.from_profile(self._model.profile)
             message_param = chat.ChatCompletionAssistantMessageParam(role='assistant')
+            # Note: model responses from this model should only have one text item, so the following
+            # shouldn't merge multiple texts into one unless you switch models between runs:
+            if profile.openai_chat_send_back_thinking_parts == 'field' and self.thinkings:
+                field = profile.openai_chat_thinking_field
+                if field:  # pragma: no branch (handled by profile validation)
+                    message_param[field] = '\n\n'.join(self.thinkings)
             if self.texts:
-                # Note: model responses from this model should only have one text item, so the following
-                # shouldn't merge multiple texts into one unless you switch models between runs:
                 message_param['content'] = '\n\n'.join(self.texts)
             else:
                 message_param['content'] = None
@@ -778,11 +792,13 @@ def _map_response_thinking_part(self, item: ThinkingPart) -> None:
             This method serves as a hook that can be overridden by subclasses
             to implement custom logic for handling thinking parts.
             """
-            # NOTE: DeepSeek `reasoning_content` field should NOT be sent back per https://api-docs.deepseek.com/guides/reasoning_model,
-            # but we currently just send it in `<think>` tags anyway as we don't want DeepSeek-specific checks here.
-            # If you need this changed, please file an issue.
-            start_tag, end_tag = self._model.profile.thinking_tags
-            self.texts.append('\n'.join([start_tag, item.content, end_tag]))
+            profile = OpenAIModelProfile.from_profile(self._model.profile)
+            include_method = profile.openai_chat_send_back_thinking_parts
+            if include_method == 'tags':
+                start_tag, end_tag = self._model.profile.thinking_tags
+                self.texts.append('\n'.join([start_tag, item.content, end_tag]))
+            elif include_method == 'field':
+                self.thinkings.append(item.content)
 
         def _map_response_tool_call_part(self, item: ToolCallPart) -> None:
             """Maps a `ToolCallPart` to the response context.
@@ -1890,26 +1906,30 @@ def _map_thinking_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[
 
         This method may be overridden by subclasses of `OpenAIStreamResponse` to customize the mapping.
         """
-        # The `reasoning_content` field is only present in DeepSeek models.
+        profile = OpenAIModelProfile.from_profile(self._model_profile)
+        custom_field = profile.openai_chat_thinking_field
+
+        # Prefer the configured custom reasoning field, if present in profile.
+        # Fall back to built-in fields if no custom field result was found.
+
+        # The `reasoning_content` field is typically present in DeepSeek and Moonshot models.
         # https://api-docs.deepseek.com/guides/reasoning_model
-        if reasoning_content := getattr(choice.delta, 'reasoning_content', None):
-            yield self._parts_manager.handle_thinking_delta(
-                vendor_part_id='reasoning_content',
-                id='reasoning_content',
-                content=reasoning_content,
-                provider_name=self.provider_name,
-            )
 
-        # The `reasoning` field is only present in gpt-oss via Ollama and OpenRouter.
+        # The `reasoning` field is typically present in gpt-oss via Ollama and OpenRouter.
         # - https://cookbook.openai.com/articles/gpt-oss/handle-raw-cot#chat-completions-api
         # - https://openrouter.ai/docs/use-cases/reasoning-tokens#basic-usage-with-reasoning-tokens
-        if reasoning := getattr(choice.delta, 'reasoning', None):  # pragma: no cover
-            yield self._parts_manager.handle_thinking_delta(
-                vendor_part_id='reasoning',
-                id='reasoning',
-                content=reasoning,
-                provider_name=self.provider_name,
-            )
+        for field_name in (custom_field, 'reasoning', 'reasoning_content'):
+            if not field_name:
+                continue
+            reasoning: str | None = getattr(choice.delta, field_name, None)
+            if reasoning:  # pragma: no branch
+                yield self._parts_manager.handle_thinking_delta(
+                    vendor_part_id=field_name,
+                    id=field_name,
+                    content=reasoning,
+                    provider_name=self.provider_name,
+                )
+                break
 
     def _map_text_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[ModelResponseStreamEvent]:
         """Hook that maps text delta content to events.
diff --git a/pydantic_ai_slim/pydantic_ai/profiles/openai.py b/pydantic_ai_slim/pydantic_ai/profiles/openai.py
@@ -7,6 +7,7 @@
 from typing import Any, Literal
 
 from .._json_schema import JsonSchema, JsonSchemaTransformer
+from ..exceptions import UserError
 from . import ModelProfile
 
 OpenAISystemPromptRole = Literal['system', 'developer', 'user']
@@ -19,6 +20,27 @@ class OpenAIModelProfile(ModelProfile):
     ALL FIELDS MUST BE `openai_` PREFIXED SO YOU CAN MERGE THEM WITH OTHER MODELS.
     """
 
+    openai_chat_thinking_field: str | None = None
+    """Non-standard field name used by some providers for model thinking content in Chat Completions API responses.
+
+    Plenty of providers use custom field names for thinking content. Ollama and newer versions of vLLM use `reasoning`,
+    while DeepSeek, older vLLM and some others use `reasoning_content`.
+
+    Notice that the thinking field configured here is currently limited to `str` type content.
+
+    If `openai_chat_send_back_thinking_parts` is set to `'field'`, this field must be set to a non-None value."""
+
+    openai_chat_send_back_thinking_parts: Literal['tags', 'field', False] = 'tags'
+    """Whether the model includes thinking content in requests.
+
+    This can be:
+    * `'tags'` (default): The thinking content is included in the main `content` field, enclosed within thinking tags as
+    specified in `thinking_tags` profile option.
+    * `'field'`: The thinking content is included in a separate field specified by `openai_chat_thinking_field`.
+    * `False`: No thinking content is sent in the request.
+
+    Defaults to `'thinking_tags'` for backward compatibility reasons."""
+
     openai_supports_strict_tool_definition: bool = True
     """This can be set by a provider or user if the OpenAI-"compatible" API doesn't support strict tool definitions."""
 
@@ -58,6 +80,11 @@ def __post_init__(self):  # pragma: no cover
                 'Use `openai_unsupported_model_settings` instead.',
                 DeprecationWarning,
             )
+        if self.openai_chat_send_back_thinking_parts == 'field' and not self.openai_chat_thinking_field:
+            raise UserError(
+                'If `openai_chat_send_back_thinking_parts` is "field", '
+                '`openai_chat_thinking_field` must be set to a non-None value.'
+            )
 
 
 def openai_model_profile(model_name: str) -> ModelProfile:
diff --git a/pydantic_ai_slim/pydantic_ai/providers/deepseek.py b/pydantic_ai_slim/pydantic_ai/providers/deepseek.py
@@ -45,7 +45,11 @@ def model_profile(self, model_name: str) -> ModelProfile | None:
         # This was not the case when using a DeepSeek model with another model class (e.g. BedrockConverseModel or GroqModel),
         # so we won't do this in `deepseek_model_profile` unless we learn it's always needed.
         return OpenAIModelProfile(
-            json_schema_transformer=OpenAIJsonSchemaTransformer, supports_json_object_output=True
+            json_schema_transformer=OpenAIJsonSchemaTransformer,
+            supports_json_object_output=True,
+            openai_chat_thinking_field='reasoning_content',
+            # Starting from DeepSeek v3.2, DeepSeek requires sending thinking parts for optimal agentic performance.
+            openai_chat_send_back_thinking_parts='field',
         ).update(profile)
 
     @overload
diff --git a/pydantic_ai_slim/pydantic_ai/providers/moonshotai.py b/pydantic_ai_slim/pydantic_ai/providers/moonshotai.py
@@ -57,6 +57,8 @@ def model_profile(self, model_name: str) -> ModelProfile | None:
             json_schema_transformer=OpenAIJsonSchemaTransformer,
             openai_supports_tool_choice_required=False,
             supports_json_object_output=True,
+            openai_chat_thinking_field='reasoning_content',
+            openai_chat_send_back_thinking_parts='field',
         ).update(profile)
 
     @overload
diff --git a/pydantic_ai_slim/pydantic_ai/providers/ollama.py b/pydantic_ai_slim/pydantic_ai/providers/ollama.py
@@ -62,7 +62,11 @@ def model_profile(self, model_name: str) -> ModelProfile | None:
 
         # As OllamaProvider is always used with OpenAIChatModel, which used to unconditionally use OpenAIJsonSchemaTransformer,
         # we need to maintain that behavior unless json_schema_transformer is set explicitly
-        return OpenAIModelProfile(json_schema_transformer=OpenAIJsonSchemaTransformer).update(profile)
+        return OpenAIModelProfile(
+            json_schema_transformer=OpenAIJsonSchemaTransformer,
+            openai_chat_thinking_field='reasoning',
+            openai_chat_send_back_thinking_parts='tags',
+        ).update(profile)
 
     def __init__(
         self,
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -3201,3 +3201,98 @@ async def test_cache_point_filtering_responses_model():
     assert len(msg['content']) == 2
     assert msg['content'][0]['text'] == 'text before'  # type: ignore[reportUnknownArgumentType]
     assert msg['content'][1]['text'] == 'text after'  # type: ignore[reportUnknownArgumentType]
+
+
+async def test_openai_custom_reasoning_field_sending_back_in_thinking_tags(allow_model_requests: None):
+    c = completion_message(
+        ChatCompletionMessage.model_construct(content='response', reasoning_content='reasoning', role='assistant')
+    )
+    m = OpenAIChatModel(
+        'foobar',
+        provider=OpenAIProvider(openai_client=MockOpenAI.create_mock(c)),
+        profile=OpenAIModelProfile(
+            openai_chat_thinking_field='reasoning_content',
+            openai_chat_send_back_thinking_parts='tags',
+        ),
+    )
+    settings = ModelSettings()
+    params = ModelRequestParameters()
+    resp = await m.request(messages=[], model_settings=settings, model_request_parameters=params)
+    assert m._map_model_response(resp) == snapshot(  # type: ignore[reportPrivateUsage]
+        {
+            'role': 'assistant',
+            'content': """\
+<think>
+reasoning
+</think>
+
+response\
+""",
+        }
+    )
+
+
+async def test_openai_custom_reasoning_field_sending_back_in_custom_field(allow_model_requests: None):
+    c = completion_message(
+        ChatCompletionMessage.model_construct(content='response', reasoning_content='reasoning', role='assistant')
+    )
+    m = OpenAIChatModel(
+        'foobar',
+        provider=OpenAIProvider(openai_client=MockOpenAI.create_mock(c)),
+        profile=OpenAIModelProfile(
+            openai_chat_thinking_field='reasoning_content',
+            openai_chat_send_back_thinking_parts='field',
+        ),
+    )
+    settings = ModelSettings()
+    params = ModelRequestParameters()
+    resp = await m.request(messages=[], model_settings=settings, model_request_parameters=params)
+    assert m._map_model_response(resp) == snapshot(  # type: ignore[reportPrivateUsage]
+        {'role': 'assistant', 'reasoning_content': 'reasoning', 'content': 'response'}
+    )
+
+
+async def test_openai_custom_reasoning_field_not_sending(allow_model_requests: None):
+    c = completion_message(
+        ChatCompletionMessage.model_construct(content='response', reasoning_content='reasoning', role='assistant')
+    )
+    m = OpenAIChatModel(
+        'foobar',
+        provider=OpenAIProvider(openai_client=MockOpenAI.create_mock(c)),
+        profile=OpenAIModelProfile(
+            openai_chat_thinking_field='reasoning_content',
+            openai_chat_send_back_thinking_parts=False,
+        ),
+    )
+    settings = ModelSettings()
+    params = ModelRequestParameters()
+    resp = await m.request(messages=[], model_settings=settings, model_request_parameters=params)
+    assert m._map_model_response(resp) == snapshot(  # type: ignore[reportPrivateUsage]
+        {'role': 'assistant', 'content': 'response'}
+    )
+
+
+async def test_openai_reasoning_in_thinking_tags(allow_model_requests: None):
+    c = completion_message(
+        ChatCompletionMessage.model_construct(content='<think>reasoning</think>response', role='assistant')
+    )
+    m = OpenAIChatModel(
+        'foobar',
+        provider=OpenAIProvider(openai_client=MockOpenAI.create_mock(c)),
+        profile=OpenAIModelProfile(openai_chat_send_back_thinking_parts='tags'),
+    )
+    settings = ModelSettings()
+    params = ModelRequestParameters()
+    resp = await m.request(messages=[], model_settings=settings, model_request_parameters=params)
+    assert m._map_model_response(resp) == snapshot(  # type: ignore[reportPrivateUsage]
+        {
+            'role': 'assistant',
+            'content': """\
+<think>
+reasoning
+</think>
+
+response\
+""",
+        }
+    )