diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index 586084ace0..96aa6207c1 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -83,8 +83,8 @@ agent = Agent(model) Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching: 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it -2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt -3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions +2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly +3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly You can combine all three strategies for maximum savings: @@ -96,8 +96,9 @@ agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', model_settings=AnthropicModelSettings( + # Use True for default 5m TTL, or specify '5m' / '1h' directly anthropic_cache_instructions=True, - anthropic_cache_tool_definitions=True, + anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions ), ) @@ -134,7 +135,7 @@ agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Instructions...', model_settings=AnthropicModelSettings( - anthropic_cache_instructions=True + anthropic_cache_instructions=True # Default 5m TTL ), ) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index c880c75a74..88aa3fb1e9 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -627,6 +627,13 @@ class CachePoint: kind: Literal['cache-point'] = 'cache-point' """Type identifier, this is available on all parts as a discriminator.""" + ttl: Literal['5m', '1h'] = '5m' + """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). + + Supported by: + + * Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.""" + MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent UserContent: TypeAlias = str | MultiModalContent | CachePoint diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 6b5b3dfb2c..de33a08f7a 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -151,19 +151,21 @@ class AnthropicModelSettings(ModelSettings, total=False): See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information. """ - anthropic_cache_tool_definitions: bool + anthropic_cache_tool_definitions: bool | Literal['5m', '1h'] """Whether to add `cache_control` to the last tool definition. When enabled, the last tool in the `tools` array will have `cache_control` set, allowing Anthropic to cache tool definitions and reduce costs. + If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly. See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ - anthropic_cache_instructions: bool + anthropic_cache_instructions: bool | Literal['5m', '1h'] """Whether to add `cache_control` to the last system prompt block. When enabled, the last system prompt will have `cache_control` set, allowing Anthropic to cache system instructions and reduce costs. + If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly. See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ @@ -476,9 +478,11 @@ def _get_tools( ] # Add cache_control to the last tool if enabled - if tools and model_settings.get('anthropic_cache_tool_definitions'): + if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')): + # If True, use '5m'; otherwise use the specified ttl value + ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs last_tool = tools[-1] - last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral') + last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl) return tools @@ -580,7 +584,7 @@ async def _map_message( # noqa: C901 elif isinstance(request_part, UserPromptPart): async for content in self._map_user_prompt(request_part): if isinstance(content, CachePoint): - self._add_cache_control_to_last_param(user_content_params) + self._add_cache_control_to_last_param(user_content_params, ttl=content.ttl) else: user_content_params.append(content) elif isinstance(request_part, ToolReturnPart): @@ -744,10 +748,14 @@ async def _map_message( # noqa: C901 system_prompt = '\n\n'.join(system_prompt_parts) # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control - if system_prompt and model_settings.get('anthropic_cache_instructions'): + if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')): + # If True, use '5m'; otherwise use the specified ttl value + ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions system_prompt_blocks = [ BetaTextBlockParam( - type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral') + type='text', + text=system_prompt, + cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl), ) ] return system_prompt_blocks, anthropic_messages @@ -755,7 +763,7 @@ async def _map_message( # noqa: C901 return system_prompt, anthropic_messages @staticmethod - def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None: + def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None: """Add cache control to the last content block param. See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. @@ -776,7 +784,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> Non raise UserError(f'Cache control not supported for param type: {last_param["type"]}') # Add cache_control to the last param - last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral') + last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl) @staticmethod async def _map_user_prompt( diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index f30c3272a2..86ba5a68d3 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -315,7 +315,11 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None): { 'role': 'user', 'content': [ - {'text': 'Some context to cache', 'type': 'text', 'cache_control': {'type': 'ephemeral'}}, + { + 'text': 'Some context to cache', + 'type': 'text', + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, + }, {'text': 'Now the question', 'type': 'text'}, ], } @@ -340,8 +344,8 @@ async def test_cache_point_multiple_markers(allow_model_requests: None): assert content == snapshot( [ - {'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}}, - {'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}}, + {'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}, + {'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}, {'text': 'Question', 'type': 'text'}, ] ) @@ -390,7 +394,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None): { 'source': {'type': 'url', 'url': 'https://example.com/image.jpg'}, 'type': 'image', - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, }, {'text': 'What is in this image?', 'type': 'text'}, ] @@ -467,7 +471,7 @@ def tool_two() -> str: # pragma: no cover 'name': 'tool_two', 'description': '', 'input_schema': {'additionalProperties': False, 'properties': {}, 'type': 'object'}, - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, }, ] ) @@ -497,7 +501,7 @@ async def test_anthropic_cache_instructions(allow_model_requests: None): { 'type': 'text', 'text': 'This is a test system prompt with instructions.', - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, } ] ) @@ -541,14 +545,48 @@ def my_tool(value: str) -> str: # pragma: no cover 'required': ['value'], 'type': 'object', }, - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, } ] ) assert system == snapshot( - [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral'}}] + [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}] + ) + + +async def test_anthropic_cache_with_custom_ttl(allow_model_requests: None): + """Test that cache settings support custom TTL values ('5m' or '1h').""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions to cache.', + model_settings=AnthropicModelSettings( + anthropic_cache_tool_definitions='1h', # Custom 1h TTL + anthropic_cache_instructions='5m', # Explicit 5m TTL + ), ) + @agent.tool_plain + def my_tool(value: str) -> str: # pragma: no cover + return f'Result: {value}' + + await agent.run('test prompt') + + # Verify custom TTL values are applied + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + tools = completion_kwargs['tools'] + system = completion_kwargs['system'] + + # Tool definitions should have 1h TTL + assert tools[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'}) + # System instructions should have 5m TTL + assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'}) + async def test_async_request_text_response(allow_model_requests: None): c = completion_message(