Skip to content

Commit 0a07c55

Browse files
committed
Merge main into add-file-search-tools-support
Resolved conflicts: - Updated google-genai to 1.50.1 (from main) - Kept FileSearchTool tests and added new cache_point_filtering test - Regenerated uv.lock with latest dependencies
2 parents 977ab53 + 359c6d2 commit 0a07c55

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3244
-690
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ jobs:
202202
strategy:
203203
fail-fast: false
204204
matrix:
205-
python-version: ["3.10", "3.11", "3.12", "3.13"]
205+
# TODO(Marcelo): Enable 3.11 again.
206+
python-version: ["3.10", "3.12", "3.13"]
206207
env:
207208
CI: true
208209
COVERAGE_PROCESS_START: ./pyproject.toml

docs/.hooks/main.py

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616

1717
def on_page_markdown(markdown: str, page: Page, config: Config, files: Files) -> str:
1818
"""Called on each file after it is read and before it is converted to HTML."""
19-
markdown = inject_snippets(markdown, (DOCS_ROOT / page.file.src_uri).parent)
19+
relative_path_root = (DOCS_ROOT / page.file.src_uri).parent
20+
markdown = inject_snippets(markdown, relative_path_root)
2021
markdown = replace_uv_python_run(markdown)
2122
markdown = render_examples(markdown)
2223
markdown = render_video(markdown)
24+
markdown = create_gateway_toggle(markdown, relative_path_root)
2325
return markdown
2426

2527

@@ -39,6 +41,7 @@ def on_env(env: Environment, config: Config, files: Files) -> Environment:
3941

4042
def on_post_build(config: Config) -> None:
4143
"""Inject extra CSS into mermaid styles to avoid titles being the same color as the background in dark mode."""
44+
assert bundle_path is not None
4245
if bundle_path.exists():
4346
content = bundle_path.read_text()
4447
content, _ = re.subn(r'}(\.statediagram)', '}.statediagramTitleText{fill:#888}\1', content, count=1)
@@ -115,3 +118,109 @@ def sub_cf_video(m: re.Match[str]) -> str:
115118
></iframe>
116119
</div>
117120
"""
121+
122+
123+
def create_gateway_toggle(markdown: str, relative_path_root: Path) -> str:
124+
"""Transform Python code blocks with Agent() calls to show both Pydantic AI and Gateway versions."""
125+
# Pattern matches Python code blocks with or without attributes, and optional annotation definitions after
126+
# Annotation definitions are numbered list items like "1. Some text" that follow the code block
127+
return re.sub(
128+
r'```py(?:thon)?(?: *\{?([^}\n]*)\}?)?\n(.*?)\n```(\n\n(?:\d+\..+?\n)+?\n)?',
129+
lambda m: transform_gateway_code_block(m, relative_path_root),
130+
markdown,
131+
flags=re.MULTILINE | re.DOTALL,
132+
)
133+
134+
135+
# Models that should get gateway transformation
136+
GATEWAY_MODELS = ('anthropic', 'openai', 'openai-responses', 'openai-chat', 'bedrock', 'google-vertex', 'groq')
137+
138+
139+
def transform_gateway_code_block(m: re.Match[str], relative_path_root: Path) -> str:
140+
"""Transform a single code block to show both versions if it contains Agent() calls."""
141+
attrs = m.group(1) or ''
142+
code = m.group(2)
143+
annotations = m.group(3) or '' # Capture annotation definitions if present
144+
145+
# Simple check: does the code contain both "Agent(" and a quoted string?
146+
if 'Agent(' not in code:
147+
attrs_str = f' {{{attrs}}}' if attrs else ''
148+
return f'```python{attrs_str}\n{code}\n```{annotations}'
149+
150+
# Check if code contains Agent() with a model that should be transformed
151+
# Look for Agent(...'model:...' or Agent(..."model:..."
152+
agent_pattern = r'Agent\((?:(?!["\']).)*([\"\'])([^"\']+)\1'
153+
agent_match = re.search(agent_pattern, code, flags=re.DOTALL)
154+
155+
if not agent_match:
156+
# No Agent() with string literal found
157+
attrs_str = f' {{{attrs}}}' if attrs else ''
158+
return f'```python{attrs_str}\n{code}\n```{annotations}'
159+
160+
model_string = agent_match.group(2)
161+
# Check if model starts with one of the gateway-supported models
162+
should_transform = any(model_string.startswith(f'{model}:') for model in GATEWAY_MODELS)
163+
164+
if not should_transform:
165+
# Model doesn't match gateway models, return original
166+
attrs_str = f' {{{attrs}}}' if attrs else ''
167+
return f'```python{attrs_str}\n{code}\n```{annotations}'
168+
169+
# Transform the code for gateway version
170+
def replace_agent_model(match: re.Match[str]) -> str:
171+
"""Replace model string with gateway/ prefix."""
172+
full_match = match.group(0)
173+
quote = match.group(1)
174+
model = match.group(2)
175+
176+
# Replace the model string while preserving the rest
177+
return full_match.replace(f'{quote}{model}{quote}', f'{quote}gateway/{model}{quote}', 1)
178+
179+
# This pattern finds: "Agent(" followed by anything (lazy), then the first quoted string
180+
gateway_code = re.sub(
181+
agent_pattern,
182+
replace_agent_model,
183+
code,
184+
flags=re.DOTALL,
185+
)
186+
187+
# Build attributes string
188+
docs_path = DOCS_ROOT / 'gateway'
189+
relative_path = docs_path.relative_to(relative_path_root, walk_up=True)
190+
link = f"<a href='{relative_path}' style='float: right;'>Learn about Gateway</a>"
191+
192+
attrs_str = f' {{{attrs}}}' if attrs else ''
193+
194+
if 'title="' in attrs:
195+
gateway_attrs = attrs.replace('title="', f'title="{link} ', 1)
196+
else:
197+
gateway_attrs = attrs + f' title="{link}"'
198+
gateway_attrs_str = f' {{{gateway_attrs}}}'
199+
200+
# Indent code lines for proper markdown formatting within tabs
201+
# Always add 4 spaces to every line (even empty ones) to preserve annotations
202+
code_lines = code.split('\n')
203+
indented_code = '\n'.join(' ' + line for line in code_lines)
204+
205+
gateway_code_lines = gateway_code.split('\n')
206+
indented_gateway_code = '\n'.join(' ' + line for line in gateway_code_lines)
207+
208+
# Indent annotation definitions if present (need to be inside tabs for Material to work)
209+
indented_annotations = ''
210+
if annotations:
211+
# Remove surrounding newlines and indent each line with 4 spaces
212+
annotation_lines = annotations.strip().split('\n')
213+
indented_annotations = '\n\n' + '\n'.join(' ' + line for line in annotation_lines) + '\n\n'
214+
215+
return f"""\
216+
=== "With Pydantic AI Gateway"
217+
218+
```python{gateway_attrs_str}
219+
{indented_gateway_code}
220+
```{indented_annotations}
221+
222+
=== "Directly to Provider API"
223+
224+
```python{attrs_str}
225+
{indented_code}
226+
```{indented_annotations}"""

docs/.overrides/main.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{% extends "base.html" %}
2+
3+
{% block announce %}
4+
<strong>
5+
<a href="/gateway">Pydantic AI Gateway</a> is now available! 🚀
6+
Enterprise-ready AI model routing: One key for all your models with real-time monitoring and budget control that works.
7+
</strong>
8+
{% endblock %}

docs/dependencies.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Pydantic AI uses a dependency injection system to provide data and services to your agent's [system prompts](agents.md#system-prompts), [tools](tools.md) and [output validators](output.md#output-validator-functions).
44

5-
Matching Pydantic AI's design philosophy, our dependency system tries to use existing best practice in Python development rather than inventing esoteric "magic", this should make dependencies type-safe, understandable easier to test and ultimately easier to deploy in production.
5+
Matching Pydantic AI's design philosophy, our dependency system tries to use existing best practice in Python development rather than inventing esoteric "magic", this should make dependencies type-safe, understandable, easier to test, and ultimately easier to deploy in production.
66

77
## Defining Dependencies
88

@@ -103,11 +103,11 @@ _(This example is complete, it can be run "as is" — you'll need to add `asynci
103103
[System prompt functions](agents.md#system-prompts), [function tools](tools.md) and [output validators](output.md#output-validator-functions) are all run in the async context of an agent run.
104104

105105
If these functions are not coroutines (e.g. `async def`) they are called with
106-
[`run_in_executor`][asyncio.loop.run_in_executor] in a thread pool, it's therefore marginally preferable
106+
[`run_in_executor`][asyncio.loop.run_in_executor] in a thread pool. It's therefore marginally preferable
107107
to use `async` methods where dependencies perform IO, although synchronous dependencies should work fine too.
108108

109109
!!! note "`run` vs. `run_sync` and Asynchronous vs. Synchronous dependencies"
110-
Whether you use synchronous or asynchronous dependencies, is completely independent of whether you use `run` or `run_sync``run_sync` is just a wrapper around `run` and agents are always run in an async context.
110+
Whether you use synchronous or asynchronous dependencies is completely independent of whether you use `run` or `run_sync``run_sync` is just a wrapper around `run` and agents are always run in an async context.
111111

112112
Here's the same example as above, but with a synchronous dependency:
113113

docs/gateway.md

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
---
2+
title: Pydantic AI Gateway
3+
status: new
4+
---
5+
6+
# Pydantic AI Gateway
7+
8+
**[Pydantic AI Gateway](https://pydantic.dev/ai-gateway)** (PAIG) is a unified interface for accessing multiple AI providers with a single key. Features include built-in OpenTelemetry observability, real-time cost monitoring, failover management, and native integration with the other tools in the [Pydantic stack](https://pydantic.dev/).
9+
10+
!!! note "Free while in Beta"
11+
The Pydantic AI Gateway is currently in Beta. You can bring your own key (BYOK) or buy inference through the Gateway (we will eat the card fee for now).
12+
13+
Sign up at [gateway.pydantic.dev](https://gateway.pydantic.dev/).
14+
15+
!!! question "Questions?"
16+
For questions and feedback, contact us on [Slack](https://logfire.pydantic.dev/docs/join-slack/).
17+
18+
## Documentation Integration
19+
20+
To help you get started with [Pydantic AI Gateway](https://gateway.pydantic.dev), some code examples on the Pydantic AI documentation include a "Via Pydantic AI Gateway" tab, alongside a "Direct to Provider API" tab with the standard Pydantic AI model string. The main difference between them is that when using Gateway, model strings use the `gateway/` prefix.
21+
22+
## Key features
23+
24+
- **API key management**: access multiple LLM providers with a single Gateway key.
25+
- **Cost Limits**: set spending limits at project, user, and API key levels with daily, weekly, and monthly caps.
26+
- **BYOK and managed providers:** Bring your own API keys (BYOK) from LLM providers, or pay for inference directly through the platform.
27+
- **Multi-provider support:** Access models from OpenAI, Anthropic, Google Vertex, Groq, and AWS Bedrock. _More providers coming soon_.
28+
- **Backend observability:** Log every request through [Pydantic Logfire](https://pydantic.dev/logfire) or any OpenTelemetry backend (_coming soon_).
29+
- **Zero translation**: Unlike traditional AI gateways that translate everything to one common schema, PAIG allows requests to flow through directly in each provider's native format. This gives you immediate access to the new model features as soon as they are released.
30+
- **Open source with self-hosting**: PAIG's core is [open source](https://github.com/pydantic/pydantic-ai-gateway/) (under [AGPL-3.0](https://www.gnu.org/licenses/agpl-3.0.en.html)), allowing self-hosting with file-based configuration, instead of using the managed service.
31+
- **Enterprise ready**: Includes SSO (with OIDC support), granular permissions, and flexible deployment options. Deploy to your Cloudflare account, or run on-premises with our [consulting support](https://pydantic.dev/contact).
32+
33+
```python {title="hello_world.py"}
34+
from pydantic_ai import Agent
35+
36+
agent = Agent('gateway/openai:gpt-5')
37+
38+
result = agent.run_sync('Where does "hello world" come from?')
39+
print(result.output)
40+
"""
41+
The first known use of "hello, world" was in a 1974 textbook about the C programming language.
42+
"""
43+
```
44+
45+
## Quick Start
46+
47+
This section contains instructions on how to set up your account and run your app with Pydantic AI Gateway credentials.
48+
49+
### Create an account
50+
51+
Using your GitHub or Google account, sign in at [gateway.pydantic.dev](https://gateway.pydantic.dev).
52+
Choose a name for your organization (or accept the default). You will automatically be assigned the Admin role.
53+
54+
A default project will be created for you. You can choose to use it, or create a new one on the [Projects](https://gateway.pydantic.dev/admin/projects) page.
55+
56+
### Add **Providers**
57+
58+
There are two ways to use Providers in the Pydantic AI Gateway: you can bring your own key (BYOK) or buy inference through the platform.
59+
60+
#### Bringing your own API key (BYOK)
61+
62+
On the [Providers](https://gateway.pydantic.dev/admin/providers) page, fill in the form to add a provider.
63+
Paste your API key into the form under Credentials, and make sure to **select the Project that will be associated to this provider**.
64+
It is possible to add multiple keys from the same provider.
65+
66+
#### Use Built-in Providers
67+
68+
Go to the [Billing page](https://gateway.pydantic.dev/admin/billing), add a payment method, and purchase $15 in credits to activate built-in providers.
69+
This gives you single-key access to all available models from OpenAI, Anthropic, Google Vertex, AWS Bedrock, and Groq.
70+
71+
### Grant access to your team
72+
73+
On the [Users](https://gateway.pydantic.dev/admin/users) page, create an invitation and share the URL with your team to allow them to access the project.
74+
75+
### Create Gateway project keys
76+
77+
On the Keys page, Admins can create project keys which are not affected by spending limits.
78+
Users can only create personal keys, that will inherit spending caps from both User and Project levels, whichever is more restrictive.
79+
80+
## Usage
81+
82+
After setting up your account with the instructions above, you will be able to make an AI model request with the Pydantic AI Gateway.
83+
The code snippets below show how you can use PAIG with different frameworks and SDKs.
84+
You can add `gateway/` as prefix on every known provider that
85+
86+
To use different models, change the model string `gateway/<api_format>:<model_name>` to other models offered by the supported providers.
87+
88+
Examples of providers and models that can be used are:
89+
90+
| **Provider** | **API Format** | **Example Model** |
91+
| --- |-----------------|------------------------------------------|
92+
| OpenAI | `openai` | `gateway/openai:gpt-5` |
93+
| Anthropic | `anthropic` | `gateway/anthropic:claude-sonnet-4-5` |
94+
| Google Vertex | `google-vertex` | `gateway/google-vertex:gemini-2.5-flash` |
95+
| Groq | `groq` | `gateway/groq:openai/gpt-oss-120b` |
96+
| AWS Bedrock | `bedrock` | `gateway/bedrock:amazon.nova-micro-v1:0` |
97+
98+
### Pydantic AI
99+
100+
Before you start, make sure you are on version 1.16 or later of `pydantic-ai`. To update to the latest version run:
101+
102+
=== "uv"
103+
104+
```bash
105+
uv sync -P pydantic-ai
106+
```
107+
108+
=== "pip"
109+
110+
```bash
111+
pip install -U pydantic-ai
112+
```
113+
114+
Set the `PYDANTIC_AI_GATEWAY_API_KEY` environment variable to your Gateway API key:
115+
116+
```bash
117+
export PYDANTIC_AI_GATEWAY_API_KEY="YOUR_PAIG_TOKEN"
118+
```
119+
120+
You can access multiple models with the same API key, as shown in the code snippet below.
121+
122+
```python {title="hello_world.py"}
123+
from pydantic_ai import Agent
124+
125+
agent = Agent('gateway/openai:gpt-5')
126+
127+
result = agent.run_sync('Where does "hello world" come from?')
128+
print(result.output)
129+
"""
130+
The first known use of "hello, world" was in a 1974 textbook about the C programming language.
131+
"""
132+
```
133+
134+
135+
### Claude Code
136+
137+
Before you start, log out of Claude Code using `/logout`.
138+
139+
Set your gateway credentials as environment variables:
140+
141+
```bash
142+
export ANTHROPIC_BASE_URL="https://gateway.pydantic.dev/proxy/anthropic"
143+
export ANTHROPIC_AUTH_TOKEN="YOUR_PAIG_TOKEN"
144+
```
145+
146+
Replace `YOUR_PAIG_TOKEN` with the API key from the Keys page.
147+
148+
Launch Claude Code by typing `claude`. All requests will now route through the Pydantic AI Gateway.
149+
150+
### SDKs
151+
152+
#### OpenAI SDK
153+
154+
```python {title="openai_sdk.py" test="skip"}
155+
import openai
156+
157+
client = openai.Client(
158+
base_url='https://gateway.pydantic.dev/proxy/chat/',
159+
api_key='paig_...',
160+
)
161+
162+
response = client.chat.completions.create(
163+
model='gpt-5',
164+
messages=[{'role': 'user', 'content': 'Hello world'}],
165+
)
166+
print(response.choices[0].message.content)
167+
#> Hello user
168+
```
169+
170+
#### Anthropic SDK
171+
172+
```python {title="anthropic_sdk.py" test="skip"}
173+
import anthropic
174+
175+
client = anthropic.Anthropic(
176+
base_url='https://gateway.pydantic.dev/proxy/anthropic/',
177+
auth_token='paig_...',
178+
)
179+
180+
response = client.messages.create(
181+
max_tokens=1000,
182+
model='claude-sonnet-4-5',
183+
messages=[{'role': 'user', 'content': 'Hello world'}],
184+
)
185+
print(response.content[0].text)
186+
#> Hello user
187+
```
188+
189+
## Troubleshooting
190+
191+
### Unable to calculate spend
192+
193+
The gateway needs to know the cost of the request in order to provide insights about the spend, and to enforce spending limits.
194+
If it's unable to calculate the cost, it will return a 400 error with the message "Unable to calculate spend".
195+
196+
When [configuring a provider](https://gateway.pydantic.dev/admin/providers/new), you need to decide if you want the gateway to block
197+
the API key if it's unable to calculate the cost. If you choose to block the API key, any further requests using that API key will fail.
198+
199+
We are actively working on supporting more providers, and models.
200+
If you have a specific provider that you would like to see supported, please let us know on [Slack](https://logfire.pydantic.dev/docs/join-slack/) or [open an issue on `genai-prices`](https://github.com/pydantic/genai-prices/issues/new).

0 commit comments

Comments
 (0)