pydantic · DouweM · Nov 18, 2025 · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md
@@ -83,8 +83,8 @@ agent = Agent(model)
 Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
 
 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
-2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
-3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
+2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
+3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
 
 You can combine all three strategies for maximum savings:
 
@@ -96,8 +96,9 @@ agent = Agent(
     'anthropic:claude-sonnet-4-5',
     system_prompt='Detailed instructions...',
     model_settings=AnthropicModelSettings(
+        # Use True for default 5m TTL, or specify '5m' / '1h' directly
         anthropic_cache_instructions=True,
-        anthropic_cache_tool_definitions=True,
+        anthropic_cache_tool_definitions='1h',  # Longer cache for tool definitions
     ),
 )
 
@@ -134,7 +135,7 @@ agent = Agent(
     'anthropic:claude-sonnet-4-5',
     system_prompt='Instructions...',
     model_settings=AnthropicModelSettings(
-        anthropic_cache_instructions=True
+        anthropic_cache_instructions=True  # Default 5m TTL
     ),
 )
 

diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -627,6 +627,13 @@ class CachePoint:
     kind: Literal['cache-point'] = 'cache-point'
     """Type identifier, this is available on all parts as a discriminator."""
 
+    ttl: Literal['5m', '1h'] = '5m'
+    """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour).
+
+    Supported by:
+
+    * Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""
+
 
 MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
 UserContent: TypeAlias = str | MultiModalContent | CachePoint

diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
@@ -151,19 +151,21 @@ class AnthropicModelSettings(ModelSettings, total=False):
     See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
     """
 
-    anthropic_cache_tool_definitions: bool
+    anthropic_cache_tool_definitions: bool | Literal['5m', '1h']
     """Whether to add `cache_control` to the last tool definition.
 
     When enabled, the last tool in the `tools` array will have `cache_control` set,
     allowing Anthropic to cache tool definitions and reduce costs.
+    If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
     See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
     """
 
-    anthropic_cache_instructions: bool
+    anthropic_cache_instructions: bool | Literal['5m', '1h']
     """Whether to add `cache_control` to the last system prompt block.
 
     When enabled, the last system prompt will have `cache_control` set,
     allowing Anthropic to cache system instructions and reduce costs.
+    If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
     See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
     """
 
@@ -476,9 +478,11 @@ def _get_tools(
         ]
 
         # Add cache_control to the last tool if enabled
-        if tools and model_settings.get('anthropic_cache_tool_definitions'):
+        if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')):
+            # If True, use '5m'; otherwise use the specified ttl value
+            ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs
             last_tool = tools[-1]
-            last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
+            last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
 
         return tools
 
@@ -580,7 +584,7 @@ async def _map_message(  # noqa: C901
                     elif isinstance(request_part, UserPromptPart):
                         async for content in self._map_user_prompt(request_part):
                             if isinstance(content, CachePoint):
-                                self._add_cache_control_to_last_param(user_content_params)
+                                self._add_cache_control_to_last_param(user_content_params, ttl=content.ttl)
                             else:
                                 user_content_params.append(content)
                     elif isinstance(request_part, ToolReturnPart):
@@ -744,18 +748,22 @@ async def _map_message(  # noqa: C901
         system_prompt = '\n\n'.join(system_prompt_parts)
 
         # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
-        if system_prompt and model_settings.get('anthropic_cache_instructions'):
+        if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')):
+            # If True, use '5m'; otherwise use the specified ttl value
+            ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions
             system_prompt_blocks = [
                 BetaTextBlockParam(
-                    type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
+                    type='text',
+                    text=system_prompt,
+                    cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
                 )
             ]
             return system_prompt_blocks, anthropic_messages
 
         return system_prompt, anthropic_messages
 
     @staticmethod
-    def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
+    def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None:
         """Add cache control to the last content block param.
 
         See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
@@ -776,7 +784,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> Non
             raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
 
         # Add cache_control to the last param
-        last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
+        last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
 
     @staticmethod
     async def _map_user_prompt(

diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py
@@ -315,7 +315,11 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None):
             {
                 'role': 'user',
                 'content': [
-                    {'text': 'Some context to cache', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
+                    {
+                        'text': 'Some context to cache',
+                        'type': 'text',
+                        'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
+                    },
                     {'text': 'Now the question', 'type': 'text'},
                 ],
             }
@@ -340,8 +344,8 @@ async def test_cache_point_multiple_markers(allow_model_requests: None):
 
     assert content == snapshot(
         [
-            {'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
-            {'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
+            {'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
+            {'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
             {'text': 'Question', 'type': 'text'},
         ]
     )
@@ -390,7 +394,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None):
             {
                 'source': {'type': 'url', 'url': 'https://example.com/image.jpg'},
                 'type': 'image',
-                'cache_control': {'type': 'ephemeral'},
+                'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
             },
             {'text': 'What is in this image?', 'type': 'text'},
         ]
@@ -467,7 +471,7 @@ def tool_two() -> str:  # pragma: no cover
                 'name': 'tool_two',
                 'description': '',
                 'input_schema': {'additionalProperties': False, 'properties': {}, 'type': 'object'},
-                'cache_control': {'type': 'ephemeral'},
+                'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
             },
         ]
     )
@@ -497,7 +501,7 @@ async def test_anthropic_cache_instructions(allow_model_requests: None):
             {
                 'type': 'text',
                 'text': 'This is a test system prompt with instructions.',
-                'cache_control': {'type': 'ephemeral'},
+                'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
             }
         ]
     )
@@ -541,14 +545,48 @@ def my_tool(value: str) -> str:  # pragma: no cover
                     'required': ['value'],
                     'type': 'object',
                 },
-                'cache_control': {'type': 'ephemeral'},
+                'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
             }
         ]
     )
     assert system == snapshot(
-        [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral'}}]
+        [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}]
+    )
+
+
+async def test_anthropic_cache_with_custom_ttl(allow_model_requests: None):
+    """Test that cache settings support custom TTL values ('5m' or '1h')."""
+    c = completion_message(
+        [BetaTextBlock(text='Response', type='text')],
+        usage=BetaUsage(input_tokens=10, output_tokens=5),
+    )
+    mock_client = MockAnthropic.create_mock(c)
+    m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
+    agent = Agent(
+        m,
+        system_prompt='System instructions to cache.',
+        model_settings=AnthropicModelSettings(
+            anthropic_cache_tool_definitions='1h',  # Custom 1h TTL
+            anthropic_cache_instructions='5m',  # Explicit 5m TTL
+        ),
     )
 
+    @agent.tool_plain
+    def my_tool(value: str) -> str:  # pragma: no cover
+        return f'Result: {value}'
+
+    await agent.run('test prompt')
+
+    # Verify custom TTL values are applied
+    completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
+    tools = completion_kwargs['tools']
+    system = completion_kwargs['system']
+
+    # Tool definitions should have 1h TTL
+    assert tools[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'})
+    # System instructions should have 5m TTL
+    assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'})
+
 
 async def test_async_request_text_response(allow_model_requests: None):
     c = completion_message(