Skip to content

Commit b336cb1

Browse files
Wh1isperDouweM
andauthored
Add ttl to CachePoint and Anthropic caching model settings (#3450)
Co-authored-by: Douwe Maan <[email protected]>
1 parent ad82ce6 commit b336cb1

File tree

4 files changed

+75
-21
lines changed

4 files changed

+75
-21
lines changed

docs/models/anthropic.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ agent = Agent(model)
8383
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
8484

8585
1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
86-
2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
87-
3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
86+
2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
87+
3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
8888

8989
You can combine all three strategies for maximum savings:
9090

@@ -96,8 +96,9 @@ agent = Agent(
9696
'anthropic:claude-sonnet-4-5',
9797
system_prompt='Detailed instructions...',
9898
model_settings=AnthropicModelSettings(
99+
# Use True for default 5m TTL, or specify '5m' / '1h' directly
99100
anthropic_cache_instructions=True,
100-
anthropic_cache_tool_definitions=True,
101+
anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions
101102
),
102103
)
103104

@@ -134,7 +135,7 @@ agent = Agent(
134135
'anthropic:claude-sonnet-4-5',
135136
system_prompt='Instructions...',
136137
model_settings=AnthropicModelSettings(
137-
anthropic_cache_instructions=True
138+
anthropic_cache_instructions=True # Default 5m TTL
138139
),
139140
)
140141

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,13 @@ class CachePoint:
627627
kind: Literal['cache-point'] = 'cache-point'
628628
"""Type identifier, this is available on all parts as a discriminator."""
629629

630+
ttl: Literal['5m', '1h'] = '5m'
631+
"""The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour).
632+
633+
Supported by:
634+
635+
* Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""
636+
630637

631638
MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
632639
UserContent: TypeAlias = str | MultiModalContent | CachePoint

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -151,19 +151,21 @@ class AnthropicModelSettings(ModelSettings, total=False):
151151
See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
152152
"""
153153

154-
anthropic_cache_tool_definitions: bool
154+
anthropic_cache_tool_definitions: bool | Literal['5m', '1h']
155155
"""Whether to add `cache_control` to the last tool definition.
156156
157157
When enabled, the last tool in the `tools` array will have `cache_control` set,
158158
allowing Anthropic to cache tool definitions and reduce costs.
159+
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
159160
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
160161
"""
161162

162-
anthropic_cache_instructions: bool
163+
anthropic_cache_instructions: bool | Literal['5m', '1h']
163164
"""Whether to add `cache_control` to the last system prompt block.
164165
165166
When enabled, the last system prompt will have `cache_control` set,
166167
allowing Anthropic to cache system instructions and reduce costs.
168+
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
167169
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
168170
"""
169171

@@ -476,9 +478,11 @@ def _get_tools(
476478
]
477479

478480
# Add cache_control to the last tool if enabled
479-
if tools and model_settings.get('anthropic_cache_tool_definitions'):
481+
if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')):
482+
# If True, use '5m'; otherwise use the specified ttl value
483+
ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs
480484
last_tool = tools[-1]
481-
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
485+
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
482486

483487
return tools
484488

@@ -580,7 +584,7 @@ async def _map_message( # noqa: C901
580584
elif isinstance(request_part, UserPromptPart):
581585
async for content in self._map_user_prompt(request_part):
582586
if isinstance(content, CachePoint):
583-
self._add_cache_control_to_last_param(user_content_params)
587+
self._add_cache_control_to_last_param(user_content_params, ttl=content.ttl)
584588
else:
585589
user_content_params.append(content)
586590
elif isinstance(request_part, ToolReturnPart):
@@ -744,18 +748,22 @@ async def _map_message( # noqa: C901
744748
system_prompt = '\n\n'.join(system_prompt_parts)
745749

746750
# If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
747-
if system_prompt and model_settings.get('anthropic_cache_instructions'):
751+
if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')):
752+
# If True, use '5m'; otherwise use the specified ttl value
753+
ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions
748754
system_prompt_blocks = [
749755
BetaTextBlockParam(
750-
type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
756+
type='text',
757+
text=system_prompt,
758+
cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
751759
)
752760
]
753761
return system_prompt_blocks, anthropic_messages
754762

755763
return system_prompt, anthropic_messages
756764

757765
@staticmethod
758-
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
766+
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None:
759767
"""Add cache control to the last content block param.
760768
761769
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
@@ -776,7 +784,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> Non
776784
raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
777785

778786
# Add cache_control to the last param
779-
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
787+
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
780788

781789
@staticmethod
782790
async def _map_user_prompt(

tests/models/test_anthropic.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,11 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None):
315315
{
316316
'role': 'user',
317317
'content': [
318-
{'text': 'Some context to cache', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
318+
{
319+
'text': 'Some context to cache',
320+
'type': 'text',
321+
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
322+
},
319323
{'text': 'Now the question', 'type': 'text'},
320324
],
321325
}
@@ -340,8 +344,8 @@ async def test_cache_point_multiple_markers(allow_model_requests: None):
340344

341345
assert content == snapshot(
342346
[
343-
{'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
344-
{'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
347+
{'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
348+
{'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
345349
{'text': 'Question', 'type': 'text'},
346350
]
347351
)
@@ -390,7 +394,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None):
390394
{
391395
'source': {'type': 'url', 'url': 'https://example.com/image.jpg'},
392396
'type': 'image',
393-
'cache_control': {'type': 'ephemeral'},
397+
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
394398
},
395399
{'text': 'What is in this image?', 'type': 'text'},
396400
]
@@ -467,7 +471,7 @@ def tool_two() -> str: # pragma: no cover
467471
'name': 'tool_two',
468472
'description': '',
469473
'input_schema': {'additionalProperties': False, 'properties': {}, 'type': 'object'},
470-
'cache_control': {'type': 'ephemeral'},
474+
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
471475
},
472476
]
473477
)
@@ -497,7 +501,7 @@ async def test_anthropic_cache_instructions(allow_model_requests: None):
497501
{
498502
'type': 'text',
499503
'text': 'This is a test system prompt with instructions.',
500-
'cache_control': {'type': 'ephemeral'},
504+
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
501505
}
502506
]
503507
)
@@ -541,14 +545,48 @@ def my_tool(value: str) -> str: # pragma: no cover
541545
'required': ['value'],
542546
'type': 'object',
543547
},
544-
'cache_control': {'type': 'ephemeral'},
548+
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
545549
}
546550
]
547551
)
548552
assert system == snapshot(
549-
[{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral'}}]
553+
[{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}]
554+
)
555+
556+
557+
async def test_anthropic_cache_with_custom_ttl(allow_model_requests: None):
558+
"""Test that cache settings support custom TTL values ('5m' or '1h')."""
559+
c = completion_message(
560+
[BetaTextBlock(text='Response', type='text')],
561+
usage=BetaUsage(input_tokens=10, output_tokens=5),
562+
)
563+
mock_client = MockAnthropic.create_mock(c)
564+
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
565+
agent = Agent(
566+
m,
567+
system_prompt='System instructions to cache.',
568+
model_settings=AnthropicModelSettings(
569+
anthropic_cache_tool_definitions='1h', # Custom 1h TTL
570+
anthropic_cache_instructions='5m', # Explicit 5m TTL
571+
),
550572
)
551573

574+
@agent.tool_plain
575+
def my_tool(value: str) -> str: # pragma: no cover
576+
return f'Result: {value}'
577+
578+
await agent.run('test prompt')
579+
580+
# Verify custom TTL values are applied
581+
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
582+
tools = completion_kwargs['tools']
583+
system = completion_kwargs['system']
584+
585+
# Tool definitions should have 1h TTL
586+
assert tools[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'})
587+
# System instructions should have 5m TTL
588+
assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'})
589+
552590

553591
async def test_async_request_text_response(allow_model_requests: None):
554592
c = completion_message(

0 commit comments

Comments
 (0)