Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions docs/models/anthropic.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ agent = Agent(model)
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:

1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly

You can combine all three strategies for maximum savings:

Expand All @@ -96,8 +96,9 @@ agent = Agent(
'anthropic:claude-sonnet-4-5',
system_prompt='Detailed instructions...',
model_settings=AnthropicModelSettings(
# Use True for default 5m TTL, or specify '5m' / '1h' directly
anthropic_cache_instructions=True,
anthropic_cache_tool_definitions=True,
anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions
),
)

Expand Down Expand Up @@ -134,7 +135,7 @@ agent = Agent(
'anthropic:claude-sonnet-4-5',
system_prompt='Instructions...',
model_settings=AnthropicModelSettings(
anthropic_cache_instructions=True
anthropic_cache_instructions=True # Default 5m TTL
),
)

Expand Down
7 changes: 7 additions & 0 deletions pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,13 @@ class CachePoint:
kind: Literal['cache-point'] = 'cache-point'
"""Type identifier, this is available on all parts as a discriminator."""

ttl: Literal['5m', '1h'] = '5m'
"""The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour).
Supported by:
* Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""


MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
UserContent: TypeAlias = str | MultiModalContent | CachePoint
Expand Down
26 changes: 17 additions & 9 deletions pydantic_ai_slim/pydantic_ai/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,19 +151,21 @@ class AnthropicModelSettings(ModelSettings, total=False):
See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
"""

anthropic_cache_tool_definitions: bool
anthropic_cache_tool_definitions: bool | Literal['5m', '1h']
"""Whether to add `cache_control` to the last tool definition.

When enabled, the last tool in the `tools` array will have `cache_control` set,
allowing Anthropic to cache tool definitions and reduce costs.
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
"""

anthropic_cache_instructions: bool
anthropic_cache_instructions: bool | Literal['5m', '1h']
"""Whether to add `cache_control` to the last system prompt block.

When enabled, the last system prompt will have `cache_control` set,
allowing Anthropic to cache system instructions and reduce costs.
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
"""

Expand Down Expand Up @@ -476,9 +478,11 @@ def _get_tools(
]

# Add cache_control to the last tool if enabled
if tools and model_settings.get('anthropic_cache_tool_definitions'):
if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')):
# If True, use '5m'; otherwise use the specified ttl value
ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs
last_tool = tools[-1]
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)

return tools

Expand Down Expand Up @@ -580,7 +584,7 @@ async def _map_message( # noqa: C901
elif isinstance(request_part, UserPromptPart):
async for content in self._map_user_prompt(request_part):
if isinstance(content, CachePoint):
self._add_cache_control_to_last_param(user_content_params)
self._add_cache_control_to_last_param(user_content_params, ttl=content.ttl)
else:
user_content_params.append(content)
elif isinstance(request_part, ToolReturnPart):
Expand Down Expand Up @@ -744,18 +748,22 @@ async def _map_message( # noqa: C901
system_prompt = '\n\n'.join(system_prompt_parts)

# If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
if system_prompt and model_settings.get('anthropic_cache_instructions'):
if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')):
# If True, use '5m'; otherwise use the specified ttl value
ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions
system_prompt_blocks = [
BetaTextBlockParam(
type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
type='text',
text=system_prompt,
cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
)
]
return system_prompt_blocks, anthropic_messages

return system_prompt, anthropic_messages

@staticmethod
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None:
"""Add cache control to the last content block param.

See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
Expand All @@ -776,7 +784,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> Non
raise UserError(f'Cache control not supported for param type: {last_param["type"]}')

# Add cache_control to the last param
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)

@staticmethod
async def _map_user_prompt(
Expand Down
54 changes: 46 additions & 8 deletions tests/models/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,11 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None):
{
'role': 'user',
'content': [
{'text': 'Some context to cache', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
{
'text': 'Some context to cache',
'type': 'text',
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
},
{'text': 'Now the question', 'type': 'text'},
],
}
Expand All @@ -340,8 +344,8 @@ async def test_cache_point_multiple_markers(allow_model_requests: None):

assert content == snapshot(
[
{'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
{'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
{'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
{'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
{'text': 'Question', 'type': 'text'},
]
)
Expand Down Expand Up @@ -390,7 +394,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None):
{
'source': {'type': 'url', 'url': 'https://example.com/image.jpg'},
'type': 'image',
'cache_control': {'type': 'ephemeral'},
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
},
{'text': 'What is in this image?', 'type': 'text'},
]
Expand Down Expand Up @@ -467,7 +471,7 @@ def tool_two() -> str: # pragma: no cover
'name': 'tool_two',
'description': '',
'input_schema': {'additionalProperties': False, 'properties': {}, 'type': 'object'},
'cache_control': {'type': 'ephemeral'},
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
},
]
)
Expand Down Expand Up @@ -497,7 +501,7 @@ async def test_anthropic_cache_instructions(allow_model_requests: None):
{
'type': 'text',
'text': 'This is a test system prompt with instructions.',
'cache_control': {'type': 'ephemeral'},
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
}
]
)
Expand Down Expand Up @@ -541,14 +545,48 @@ def my_tool(value: str) -> str: # pragma: no cover
'required': ['value'],
'type': 'object',
},
'cache_control': {'type': 'ephemeral'},
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
}
]
)
assert system == snapshot(
[{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral'}}]
[{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}]
)


async def test_anthropic_cache_with_custom_ttl(allow_model_requests: None):
"""Test that cache settings support custom TTL values ('5m' or '1h')."""
c = completion_message(
[BetaTextBlock(text='Response', type='text')],
usage=BetaUsage(input_tokens=10, output_tokens=5),
)
mock_client = MockAnthropic.create_mock(c)
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
agent = Agent(
m,
system_prompt='System instructions to cache.',
model_settings=AnthropicModelSettings(
anthropic_cache_tool_definitions='1h', # Custom 1h TTL
anthropic_cache_instructions='5m', # Explicit 5m TTL
),
)

@agent.tool_plain
def my_tool(value: str) -> str: # pragma: no cover
return f'Result: {value}'

await agent.run('test prompt')

# Verify custom TTL values are applied
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
tools = completion_kwargs['tools']
system = completion_kwargs['system']

# Tool definitions should have 1h TTL
assert tools[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'})
# System instructions should have 5m TTL
assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'})


async def test_async_request_text_response(allow_model_requests: None):
c = completion_message(
Expand Down