Skip to content

Commit 36fe1c9

Browse files
committed
Add support for Gemini 3 Pro Preview
1 parent 2981b17 commit 36fe1c9

File tree

13 files changed

+1188
-56
lines changed

13 files changed

+1188
-56
lines changed

docs/models/google.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,22 +214,22 @@ from pydantic_ai.models.google import GoogleModel, GoogleModelSettings
214214
settings = GoogleModelSettings(
215215
temperature=0.2,
216216
max_tokens=1024,
217-
google_thinking_config={'thinking_budget': 2048},
217+
google_thinking_config={'thinking_level': 'low'},
218218
google_safety_settings=[
219219
{
220220
'category': HarmCategory.HARM_CATEGORY_HATE_SPEECH,
221221
'threshold': HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
222222
}
223223
]
224224
)
225-
model = GoogleModel('gemini-2.5-flash')
225+
model = GoogleModel('gemini-2.5-pro')
226226
agent = Agent(model, model_settings=settings)
227227
...
228228
```
229229

230230
### Disable thinking
231231

232-
You can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
232+
On models older than Gemini 2.5 Pro, you can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
233233

234234
```python
235235
from pydantic_ai import Agent

pydantic_ai_slim/pydantic_ai/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
DocumentFormat,
4747
DocumentMediaType,
4848
DocumentUrl,
49+
FileOptions,
4950
FilePart,
5051
FileUrl,
5152
FinalResultEvent,
@@ -55,6 +56,7 @@
5556
HandleResponseEvent,
5657
ImageFormat,
5758
ImageMediaType,
59+
ImageOptions,
5860
ImageUrl,
5961
ModelMessage,
6062
ModelMessagesTypeAdapter,
@@ -82,6 +84,7 @@
8284
UserPromptPart,
8385
VideoFormat,
8486
VideoMediaType,
87+
VideoOptions,
8588
VideoUrl,
8689
)
8790
from .output import NativeOutput, PromptedOutput, StructuredDict, TextOutput, ToolOutput
@@ -146,6 +149,7 @@
146149
'DocumentFormat',
147150
'DocumentMediaType',
148151
'DocumentUrl',
152+
'FileOptions',
149153
'FileUrl',
150154
'FilePart',
151155
'FinalResultEvent',
@@ -155,6 +159,7 @@
155159
'HandleResponseEvent',
156160
'ImageFormat',
157161
'ImageMediaType',
162+
'ImageOptions',
158163
'ImageUrl',
159164
'BinaryImage',
160165
'ModelMessage',
@@ -183,6 +188,7 @@
183188
'UserPromptPart',
184189
'VideoFormat',
185190
'VideoMediaType',
191+
'VideoOptions',
186192
'VideoUrl',
187193
# profiles
188194
'ModelProfile',

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 109 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import pydantic_core
1414
from genai_prices import calc_price, types as genai_types
1515
from opentelemetry._events import Event # pyright: ignore[reportPrivateImportUsage]
16-
from typing_extensions import deprecated
16+
from typing_extensions import TypedDict, deprecated
1717

1818
from . import _otel_messages, _utils
1919
from ._utils import generate_tool_call_id as _generate_tool_call_id, now_utc as _now_utc
@@ -106,6 +106,61 @@ def _multi_modal_content_identifier(identifier: str | bytes) -> str:
106106
return hashlib.sha1(identifier).hexdigest()[:6]
107107

108108

109+
class FileOptions(TypedDict, total=False):
110+
"""Options for how the provider should process the file."""
111+
112+
pass
113+
114+
115+
class VideoOptions(TypedDict, total=False):
116+
"""Options for how the provider should process the video."""
117+
118+
detail: Literal['high', 'medium', 'low']
119+
"""The detail level of the video.
120+
121+
Supported by:
122+
123+
- Google: Maps to `media_resolution`. # TODO (DouweM): URL
124+
"""
125+
126+
fps: float
127+
"""The frame rate of the video sent to the model. If not specified, the default value will be 1.0. The fps range is (0.0, 24.0].
128+
129+
Supported by:
130+
131+
- Google: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
132+
"""
133+
134+
start_offset: str
135+
"""The start offset of the video sent to the model.
136+
137+
Supported by:
138+
139+
- Google: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
140+
"""
141+
142+
end_offset: str
143+
"""The end offset of the video sent to the model.
144+
145+
Supported by:
146+
147+
- Google: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
148+
"""
149+
150+
151+
class ImageOptions(TypedDict, total=False):
152+
"""Options for how the provider should process the image."""
153+
154+
detail: Literal['high', 'medium', 'low']
155+
"""The detail level of the image.
156+
157+
Supported by:
158+
159+
- OpenAI: Supports only `high` and `low`. https://platform.openai.com/docs/guides/images-vision?api-mode=responses#specify-image-input-detail-level
160+
- Google: Maps to `media_resolution`. # TODO (DouweM): URL
161+
"""
162+
163+
109164
@dataclass(init=False, repr=False)
110165
class FileUrl(ABC):
111166
"""Abstract base class for any URL-based file."""
@@ -122,13 +177,9 @@ class FileUrl(ABC):
122177
* If False, the URL is sent directly to the model and no download is performed.
123178
"""
124179

125-
vendor_metadata: dict[str, Any] | None = None
126-
"""Vendor-specific metadata for the file.
127-
128-
Supported by:
129-
- `GoogleModel`: `VideoUrl.vendor_metadata` is used as `video_metadata`: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
130-
- `OpenAIChatModel`, `OpenAIResponsesModel`: `ImageUrl.vendor_metadata['detail']` is used as `detail` setting for images
131-
"""
180+
# TODO (v2): Rename to `options`?
181+
vendor_metadata: FileOptions | None = None
182+
"""Options on how the provider should process the file."""
132183

133184
_media_type: Annotated[str | None, pydantic.Field(alias='media_type', default=None, exclude=True)] = field(
134185
compare=False, default=None
@@ -145,7 +196,7 @@ def __init__(
145196
media_type: str | None = None,
146197
identifier: str | None = None,
147198
force_download: bool = False,
148-
vendor_metadata: dict[str, Any] | None = None,
199+
vendor_metadata: FileOptions | None = None,
149200
) -> None:
150201
self.url = url
151202
self._media_type = media_type
@@ -209,7 +260,7 @@ def __init__(
209260
media_type: str | None = None,
210261
identifier: str | None = None,
211262
force_download: bool = False,
212-
vendor_metadata: dict[str, Any] | None = None,
263+
vendor_metadata: VideoOptions | None = None,
213264
kind: Literal['video-url'] = 'video-url',
214265
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
215266
_media_type: str | None = None,
@@ -285,7 +336,7 @@ def __init__(
285336
media_type: str | None = None,
286337
identifier: str | None = None,
287338
force_download: bool = False,
288-
vendor_metadata: dict[str, Any] | None = None,
339+
vendor_metadata: FileOptions | None = None,
289340
kind: Literal['audio-url'] = 'audio-url',
290341
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
291342
_media_type: str | None = None,
@@ -348,7 +399,7 @@ def __init__(
348399
media_type: str | None = None,
349400
identifier: str | None = None,
350401
force_download: bool = False,
351-
vendor_metadata: dict[str, Any] | None = None,
402+
vendor_metadata: ImageOptions | None = None,
352403
kind: Literal['image-url'] = 'image-url',
353404
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
354405
_media_type: str | None = None,
@@ -406,7 +457,7 @@ def __init__(
406457
media_type: str | None = None,
407458
identifier: str | None = None,
408459
force_download: bool = False,
409-
vendor_metadata: dict[str, Any] | None = None,
460+
vendor_metadata: FileOptions | None = None,
410461
kind: Literal['document-url'] = 'document-url',
411462
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
412463
_media_type: str | None = None,
@@ -476,12 +527,8 @@ class BinaryContent:
476527
media_type: AudioMediaType | ImageMediaType | DocumentMediaType | str
477528
"""The media type of the binary data."""
478529

479-
vendor_metadata: dict[str, Any] | None = None
480-
"""Vendor-specific metadata for the file.
481-
482-
Supported by:
483-
- `GoogleModel`: `BinaryContent.vendor_metadata` is used as `video_metadata`: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
484-
- `OpenAIChatModel`, `OpenAIResponsesModel`: `BinaryContent.vendor_metadata['detail']` is used as `detail` setting for images
530+
vendor_metadata: FileOptions | None = None
531+
"""Options on how the provider should process the file.
485532
"""
486533

487534
_identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field(
@@ -491,13 +538,52 @@ class BinaryContent:
491538
kind: Literal['binary'] = 'binary'
492539
"""Type identifier, this is available on all parts as a discriminator."""
493540

541+
@overload
542+
def __init__(
543+
self,
544+
data: bytes,
545+
*,
546+
media_type: ImageMediaType,
547+
identifier: str | None = None,
548+
vendor_metadata: ImageOptions | None = None,
549+
kind: Literal['binary'] = 'binary',
550+
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
551+
_identifier: str | None = None,
552+
) -> None: ...
553+
554+
@overload
555+
def __init__(
556+
self,
557+
data: bytes,
558+
*,
559+
media_type: VideoMediaType,
560+
identifier: str | None = None,
561+
vendor_metadata: VideoOptions | None = None,
562+
kind: Literal['binary'] = 'binary',
563+
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
564+
_identifier: str | None = None,
565+
) -> None: ...
566+
567+
@overload
568+
def __init__(
569+
self,
570+
data: bytes,
571+
*,
572+
media_type: AudioMediaType | DocumentMediaType | str,
573+
identifier: str | None = None,
574+
vendor_metadata: FileOptions | None = None,
575+
kind: Literal['binary'] = 'binary',
576+
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
577+
_identifier: str | None = None,
578+
) -> None: ...
579+
494580
def __init__(
495581
self,
496582
data: bytes,
497583
*,
498-
media_type: AudioMediaType | ImageMediaType | DocumentMediaType | str,
584+
media_type: AudioMediaType | ImageMediaType | VideoMediaType | DocumentMediaType | str,
499585
identifier: str | None = None,
500-
vendor_metadata: dict[str, Any] | None = None,
586+
vendor_metadata: FileOptions | None = None,
501587
kind: Literal['binary'] = 'binary',
502588
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
503589
_identifier: str | None = None,
@@ -516,7 +602,7 @@ def narrow_type(bc: BinaryContent) -> BinaryContent | BinaryImage:
516602
data=bc.data,
517603
media_type=bc.media_type,
518604
identifier=bc.identifier,
519-
vendor_metadata=bc.vendor_metadata,
605+
vendor_metadata=cast(ImageOptions, bc.vendor_metadata),
520606
)
521607
else:
522608
return bc
@@ -599,7 +685,7 @@ def __init__(
599685
*,
600686
media_type: str,
601687
identifier: str | None = None,
602-
vendor_metadata: dict[str, Any] | None = None,
688+
vendor_metadata: ImageOptions | None = None,
603689
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
604690
kind: Literal['binary'] = 'binary',
605691
_identifier: str | None = None,

0 commit comments

Comments
 (0)