Gemini 3 Pro

DouweM · DouweM · commit 7ad6ccd4d748 · 2025-11-18T22:11:29.000Z
diff --git a/docs/models/google.md b/docs/models/google.md
@@ -214,22 +214,22 @@ from pydantic_ai.models.google import GoogleModel, GoogleModelSettings
 settings = GoogleModelSettings(
     temperature=0.2,
     max_tokens=1024,
-    google_thinking_config={'thinking_budget': 2048},
+    google_thinking_config={'thinking_level': 'low'},
     google_safety_settings=[
         {
             'category': HarmCategory.HARM_CATEGORY_HATE_SPEECH,
             'threshold': HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
         }
     ]
 )
-model = GoogleModel('gemini-2.5-flash')
+model = GoogleModel('gemini-2.5-pro')
 agent = Agent(model, model_settings=settings)
 ...
 ```
 
 ### Disable thinking
 
-You can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
+On models older than Gemini 2.5 Pro, you can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
 
 ```python
 from pydantic_ai import Agent
diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -38,6 +38,7 @@
     VideoUrl,
 )
 from ..profiles import ModelProfileSpec
+from ..profiles.google import GoogleModelProfile
 from ..providers import Provider, infer_provider
 from ..settings import ModelSettings
 from ..tools import ToolDefinition
@@ -228,12 +229,17 @@ def system(self) -> str:
     def prepare_request(
         self, model_settings: ModelSettings | None, model_request_parameters: ModelRequestParameters
     ) -> tuple[ModelSettings | None, ModelRequestParameters]:
+        supports_native_output_with_builtin_tools = GoogleModelProfile.from_profile(
+            self.profile
+        ).google_supports_native_output_with_builtin_tools
         if model_request_parameters.builtin_tools and model_request_parameters.output_tools:
             if model_request_parameters.output_mode == 'auto':
-                model_request_parameters = replace(model_request_parameters, output_mode='prompted')
+                output_mode = 'native' if supports_native_output_with_builtin_tools else 'prompted'
+                model_request_parameters = replace(model_request_parameters, output_mode=output_mode)
             else:
+                output_mode = 'NativeOutput' if supports_native_output_with_builtin_tools else 'PromptedOutput'
                 raise UserError(
-                    'Google does not support output tools and built-in tools at the same time. Use `output_type=PromptedOutput(...)` instead.'
+                    f'Google does not support output tools and built-in tools at the same time. Use `output_type={output_mode}(...)` instead.'
                 )
         return super().prepare_request(model_settings, model_request_parameters)
 
@@ -409,9 +415,9 @@ async def _build_content_and_config(
         response_mime_type = None
         response_schema = None
         if model_request_parameters.output_mode == 'native':
-            if tools:
+            if model_request_parameters.function_tools:
                 raise UserError(
-                    'Google does not support `NativeOutput` and tools at the same time. Use `output_type=ToolOutput(...)` instead.'
+                    'Google does not support `NativeOutput` and function tools at the same time. Use `output_type=ToolOutput(...)` instead.'
                 )
             response_mime_type = 'application/json'
             output_object = model_request_parameters.output_object
@@ -675,22 +681,19 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
             for part in parts:
                 if part.thought_signature:
                     signature = base64.b64encode(part.thought_signature).decode('utf-8')
+                    # Attach signature to most recent thinking part, if there was one
                     yield self._parts_manager.handle_thinking_delta(
-                        vendor_part_id='thinking',
+                        vendor_part_id=None,
                         signature=signature,
                         provider_name=self.provider_name,
                     )
 
                 if part.text is not None:
                     if len(part.text) > 0:
                         if part.thought:
-                            yield self._parts_manager.handle_thinking_delta(
-                                vendor_part_id='thinking', content=part.text
-                            )
+                            yield self._parts_manager.handle_thinking_delta(vendor_part_id=None, content=part.text)
                         else:
-                            maybe_event = self._parts_manager.handle_text_delta(
-                                vendor_part_id='content', content=part.text
-                            )
+                            maybe_event = self._parts_manager.handle_text_delta(vendor_part_id=None, content=part.text)
                             if maybe_event is not None:  # pragma: no branch
                                 yield maybe_event
                 elif part.function_call:
@@ -749,6 +752,7 @@ def timestamp(self) -> datetime:
 def _content_model_response(m: ModelResponse, provider_name: str) -> ContentDict:  # noqa: C901
     parts: list[PartDict] = []
     thought_signature: bytes | None = None
+    function_call_requires_signature: bool = True
     for item in m.parts:
         part: PartDict = {}
         if thought_signature:
@@ -758,6 +762,15 @@ def _content_model_response(m: ModelResponse, provider_name: str) -> ContentDict
         if isinstance(item, ToolCallPart):
             function_call = FunctionCallDict(name=item.tool_name, args=item.args_as_dict(), id=item.tool_call_id)
             part['function_call'] = function_call
+            if function_call_requires_signature and not part.get('thought_signature'):
+                # Per https://ai.google.dev/gemini-api/docs/gemini-3?thinking=high#rest_2:
+                # > If you are transferring a conversation trace from another model (e.g., Gemini 2.5) or injecting
+                # > a custom function call that was not generated by Gemini 3, you will not have a valid signature.
+                # > To bypass strict validation in these specific scenarios, populate the field with this specific
+                # > dummy string: "thoughtSignature": "context_engineering_is_the_way_to_go"
+                part['thought_signature'] = b'context_engineering_is_the_way_to_go'
+            # Only the first function call requires a signature
+            function_call_requires_signature = False
         elif isinstance(item, TextPart):
             part['text'] = item.content
         elif isinstance(item, ThinkingPart):
diff --git a/pydantic_ai_slim/pydantic_ai/profiles/google.py b/pydantic_ai_slim/pydantic_ai/profiles/google.py
@@ -1,22 +1,37 @@
 from __future__ import annotations as _annotations
 
 import warnings
+from dataclasses import dataclass
 
 from pydantic_ai.exceptions import UserError
 
 from .._json_schema import JsonSchema, JsonSchemaTransformer
 from . import ModelProfile
 
 
+@dataclass(kw_only=True)
+class GoogleModelProfile(ModelProfile):
+    """Profile for models used with `GoogleModel`.
+
+    ALL FIELDS MUST BE `google_` PREFIXED SO YOU CAN MERGE THEM WITH OTHER MODELS.
+    """
+
+    google_supports_native_output_with_builtin_tools: bool = False
+    """Whether the model supports native output with builtin tools.
+    See https://ai.google.dev/gemini-api/docs/structured-output?example=recipe#structured_outputs_with_tools"""
+
+
 def google_model_profile(model_name: str) -> ModelProfile | None:
     """Get the model profile for a Google model."""
     is_image_model = 'image' in model_name
-    return ModelProfile(
+    is_3_or_newer = 'gemini-3' in model_name
+    return GoogleModelProfile(
         json_schema_transformer=GoogleJsonSchemaTransformer,
         supports_image_output=is_image_model,
         supports_json_schema_output=not is_image_model,
         supports_json_object_output=not is_image_model,
         supports_tools=not is_image_model,
+        google_supports_native_output_with_builtin_tools=is_3_or_newer,
     )
 
 
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
@@ -70,7 +70,7 @@ logfire = ["logfire[httpx]>=3.14.1"]
 openai = ["openai>=1.107.2"]
 cohere = ["cohere>=5.18.0; platform_system != 'Emscripten'"]
 vertexai = ["google-auth>=2.36.0", "requests>=2.32.2"]
-google = ["google-genai>=1.50.1"]
+google = ["google-genai>=1.51.0"]
 anthropic = ["anthropic>=0.70.0"]
 groq = ["groq>=0.25.0"]
 mistral = ["mistralai>=1.9.10"]
diff --git a/tests/models/cassettes/test_google/test_google_builtin_tools_with_other_tools.yaml b/tests/models/cassettes/test_google/test_google_builtin_tools_with_other_tools.yaml
@@ -8,7 +8,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '526'
+      - '560'
       content-type:
       - application/json
       host:
@@ -19,13 +19,16 @@ interactions:
       - parts:
         - text: What is the largest city in Mexico?
         role: user
-      generationConfig: {}
+      generationConfig:
+        responseModalities:
+        - TEXT
       systemInstruction:
         parts:
-        - text: |-
+        - text: |2
+
             Always respond with a JSON object that's compatible with this schema:
 
-            {"properties": {"city": {"type": "string"}, "country": {"type": "string"}}, "required": ["city", "country"], "title": "CityLocation", "type": "object"}
+            {"properties": {"city": {"type": "string"}, "country": {"type": "string"}}, "required": ["city", "country"], "type": "object", "title": "CityLocation"}
 
             Don't include any text or Markdown fencing before or after.
         role: user
@@ -37,11 +40,11 @@ interactions:
       alt-svc:
       - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
       content-length:
-      - '626'
+      - '595'
       content-type:
       - application/json; charset=UTF-8
       server-timing:
-      - gfet4t7; dur=780
+      - gfet4t7; dur=804
       transfer-encoding:
       - chunked
       vary:
@@ -58,15 +61,14 @@ interactions:
         groundingMetadata: {}
         index: 0
       modelVersion: gemini-2.5-flash
-      responseId: 6Xq3aPnXNtqKqtsP8ZuDyAc
+      responseId: BegcaeXaA7qgz7IP_qOzwAc
       usageMetadata:
         candidatesTokenCount: 13
-        promptTokenCount: 83
+        promptTokenCount: 85
         promptTokensDetails:
         - modality: TEXT
-          tokenCount: 83
-        thoughtsTokenCount: 33
-        totalTokenCount: 129
+          tokenCount: 85
+        totalTokenCount: 98
     status:
       code: 200
       message: OK
diff --git a/tests/models/cassettes/test_google/test_google_native_output_with_builtin_tools_gemini_3.yaml b/tests/models/cassettes/test_google/test_google_native_output_with_builtin_tools_gemini_3.yaml
@@ -0,0 +1,160 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '430'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+    method: POST
+    parsed_body:
+      contents:
+      - parts:
+        - text: What is the largest city in Mexico?
+        role: user
+      generationConfig:
+        responseMimeType: application/json
+        responseModalities:
+        - TEXT
+        responseSchema:
+          properties:
+            city:
+              type: STRING
+            country:
+              type: STRING
+          property_ordering:
+          - city
+          - country
+          required:
+          - city
+          - country
+          title: CityLocation
+          type: OBJECT
+      tools:
+      - urlContext: {}
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-preview:generateContent
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      content-length:
+      - '1448'
+      content-type:
+      - application/json; charset=UTF-8
+      server-timing:
+      - gfet4t7; dur=2843
+      transfer-encoding:
+      - chunked
+      vary:
+      - Origin
+      - X-Origin
+      - Referer
+    parsed_body:
+      candidates:
+      - content:
+          parts:
+          - text: "{\n  \"city\": \"Mexico City\",\n  \"country\": \"Mexico\"\n} "
+            thoughtSignature: EtcECtQEAdHtim/CS2N3y71XJ9wjmj1QDm3Y7R7cun7pMFmxQC3zalFnbopV9EjBFsSRntkmA/QdmURQ7kc2e0K+uXmuzDnHk2h7mXyo9warZzg0cv8lOAyvcslmTZjg58ArKIf4hJ8o3f4qI0y36Pv4eDDO9EOya7C4resSA6qdQadCJRrZt7Jfms/KTZgVWpwm96hBmof1hTdkSjAEwKGxRsA3eWcTISBsECaVqAQw+bYFMT47O6M/29KnIhmxmojARpx43G8yV4pjKmgIjGxVmnS9TyHtEmU9iYr8LeREU7IXMUhoKp8alcNWFwqlxnbuOCwu0ar4IgxnPIk0Kfw9RoR00H+GW6WJaJfXPByyjPoR8ArmkDkG6fvKmRb+yG7S6Eq5ewHOHQIzWSZ+A4+Ngs4om04CpeSpDf0M7UlumQvzTyJE9ljkWbMcfEIL4Dv56Uj5dmbmNg71vnesDak1xSIu25EccJmhfptH18+vomIKd1EgEip+f1enoKiPN2rtk9biVdLgfAHjf5bpL5hAo40Q763cUs8nWRv/s/vYqGO/HL5+mZWheQMdg2hQmw6an0sIAWI+srpQMXz9PsLxSOc6H3yOPCOYkmG0yDtfuxe4X8HndoSmCF/C4Zu/1VmnWoZBhTFaPNyvlL1yL502Zp5qG/jYJ2gNIu78r89N33Yk3RVSrFWoNcB2z2DYY4EXCz8+1e1qyCPgQsNXVMFO2KO2CcmsssODEIDB0+d9ysiGuNW9Bc5dhW7Iy25s6mvHtQRb2S4a86kIJVP/yvUcwapiKk3slNY=
+          role: model
+        finishReason: STOP
+        index: 0
+      modelVersion: gemini-3-pro-preview
+      responseId: I-scaZ7wFOiyqtsP0rvOoQc
+      usageMetadata:
+        candidatesTokenCount: 21
+        promptTokenCount: 9
+        promptTokensDetails:
+        - modality: TEXT
+          tokenCount: 9
+        thoughtsTokenCount: 135
+        totalTokenCount: 165
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '430'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+    method: POST
+    parsed_body:
+      contents:
+      - parts:
+        - text: What is the largest city in Mexico?
+        role: user
+      generationConfig:
+        responseMimeType: application/json
+        responseModalities:
+        - TEXT
+        responseSchema:
+          properties:
+            city:
+              type: STRING
+            country:
+              type: STRING
+          property_ordering:
+          - city
+          - country
+          required:
+          - city
+          - country
+          title: CityLocation
+          type: OBJECT
+      tools:
+      - urlContext: {}
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-preview:generateContent
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      content-length:
+      - '1300'
+      content-type:
+      - application/json; charset=UTF-8
+      server-timing:
+      - gfet4t7; dur=2548
+      transfer-encoding:
+      - chunked
+      vary:
+      - Origin
+      - X-Origin
+      - Referer
+    parsed_body:
+      candidates:
+      - content:
+          parts:
+          - text: "{\n  \"city\": \"Mexico City\",\n  \"country\": \"Mexico\"\n} "
+            thoughtSignature: EugDCuUDAdHtim8d0XRoXBnV7jLdb4TtGjESPsOynIUtVoSXym1duR5bwzlaoUOkgu83FEscE1UlxByIXybHerIyzzvuCnBK+q3Z24cQ21mqWi0ITUcrix6I4oMTJuihgseowtOc45Z/P9gve+mTh1JlZPFQZRVuBXxxvFtGTeKYzl4R7yTndHRF2qJRcYLnl2EtaZmGDbvWyybdLgmcNrMrtKG8NEWF9yriL0GduayEPwOlPk8d2QpbMgV79PXGtvBQ7kmE1VpHL1Y7zdRsl2edVtlx+nwXnIZlim6QC+ff2lNxRBtqeyDxrESDbZuW4PTzBM1McHyg3HkR27zcxScs6JtMP1gNHxuVZCFkz1aP5uP0IyvqjFUR5LPfx1I/1eWL23C9TTkxxkaiyAIFnpq04ebWS/mcwKFpUxHRrRtK6Zvtxyb4/TmRwknx+T9U2PfPGLASLAxa/1G7cJh7HPpX4UTipM+6hNOJX5XjQo5FLHBsPyHzmFrVbyYFOT8pSqwDdqR+3QozY/y87GdKLgLTZjwO0UqPsAkO4lvnB6++NaGxeZWW7qsnH2gz+T9QAVT9BEq7pf67VFicOaP8MdDs3mII7D8vs7P4J+GqjoP2gTC4sIjwn8TiSR4fzjTNoHqkvfLOP9PZMAU=
+          role: model
+        finishReason: STOP
+        index: 0
+      modelVersion: gemini-3-pro-preview
+      responseId: JuscaY35DYmymtkPr67luA0
+      usageMetadata:
+        candidatesTokenCount: 21
+        promptTokenCount: 9
+        promptTokensDetails:
+        - modality: TEXT
+          tokenCount: 9
+        thoughtsTokenCount: 108
+        totalTokenCount: 138
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/test_google.py b/tests/models/test_google.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -214,22 +214,22 @@ from pydantic_ai.models.google import GoogleModel, GoogleModelSettings`
`214`	`214`	`settings = GoogleModelSettings(`
`215`	`215`	`temperature=0.2,`
`216`	`216`	`max_tokens=1024,`
`217`		`- google_thinking_config={'thinking_budget': 2048},`
	`217`	`+ google_thinking_config={'thinking_level': 'low'},`
`218`	`218`	`google_safety_settings=[`
`219`	`219`	`{`
`220`	`220`	`'category': HarmCategory.HARM_CATEGORY_HATE_SPEECH,`
`221`	`221`	`'threshold': HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,`
`222`	`222`	`}`
`223`	`223`	`]`
`224`	`224`	`)`
`225`		`-model = GoogleModel('gemini-2.5-flash')`
	`225`	`+model = GoogleModel('gemini-2.5-pro')`
`226`	`226`	`agent = Agent(model, model_settings=settings)`
`227`	`227`	`...`
`228`	`228`	```
`229`	`229`
`230`	`230`	`### Disable thinking`
`231`	`231`
`232`		-You can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
	`232`	+On models older than Gemini 2.5 Pro, you can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
`233`	`233`
`234`	`234`	```python
`235`	`235`	`from pydantic_ai import Agent`