From b4862a0a329af0cad6e4ecb5127d2f5a38fa8bc7 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Wed, 19 Nov 2025 11:29:54 +0800 Subject: [PATCH 01/14] feat: add cache all and limit cache point in AnthropicModel --- docs/models/anthropic.md | 49 +++++- .../pydantic_ai/models/anthropic.py | 104 +++++++++++- tests/models/test_anthropic.py | 160 ++++++++++++++++++ 3 files changed, 309 insertions(+), 4 deletions(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index 96aa6207c1..ce9fa7b67c 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -80,18 +80,29 @@ agent = Agent(model) ## Prompt Caching -Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching: +Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides four ways to use prompt caching: 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it 2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly 3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly +4. **Cache All (Convenience)**: Set [`AnthropicModelSettings.anthropic_cache_all`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_all] to `True` to automatically cache both system instructions and the last user message -You can combine all three strategies for maximum savings: +You can combine multiple strategies for maximum savings: ```python {test="skip"} from pydantic_ai import Agent, CachePoint, RunContext from pydantic_ai.models.anthropic import AnthropicModelSettings +# Option 1: Use anthropic_cache_all for convenience (caches system + last message) +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='Detailed instructions...', + model_settings=AnthropicModelSettings( + anthropic_cache_all=True, # Caches both system prompt and last message + ), +) + +# Option 2: Fine-grained control with individual settings agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', @@ -145,3 +156,37 @@ async def main(): print(f'Cache write tokens: {usage.cache_write_tokens}') print(f'Cache read tokens: {usage.cache_read_tokens}') ``` + +### Cache Point Limits + +Anthropic enforces a maximum of 4 cache points per request. Pydantic AI automatically manages this limit: + +- **`anthropic_cache_all`**: Uses 2 cache points (system instructions + last message) +- **`anthropic_cache_instructions`**: Uses 1 cache point +- **`anthropic_cache_tool_definitions`**: Uses 1 cache point +- **`CachePoint` markers**: Use remaining available cache points + +When the total exceeds 4 cache points, Pydantic AI automatically removes cache points from **older messages** (keeping the most recent ones), ensuring your requests always comply with Anthropic's limits without errors. + +```python {test="skip"} +from pydantic_ai import Agent, CachePoint +from pydantic_ai.models.anthropic import AnthropicModelSettings + +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='Instructions...', + model_settings=AnthropicModelSettings( + anthropic_cache_all=True, # Uses 2 cache points + ), +) + +async def main(): + # Even with multiple CachePoint markers, only 2 more will be kept + # (4 total limit - 2 from cache_all = 2 available) + result = await agent.run([ + 'Context 1', CachePoint(), # Will be kept + 'Context 2', CachePoint(), # Will be kept + 'Context 3', CachePoint(), # Automatically removed (oldest) + 'Question' + ]) +``` diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index de33a08f7a..4c36c546dd 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -169,6 +169,22 @@ class AnthropicModelSettings(ModelSettings, total=False): See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ + anthropic_cache_all: bool | Literal['5m', '1h'] + """Convenience setting to enable caching for both system instructions and the last user message. + + When enabled, this automatically adds cache points to: + 1. The last system prompt block (system instructions) + 2. The last content block in the final user message + + This is equivalent to setting both `anthropic_cache_instructions` and adding a cache point + to the last message, but more convenient for common use cases. + If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly. + + Note: Uses 2 of Anthropic's 4 available cache points per request. Any additional CachePoint + markers in messages will be automatically limited to respect the 4-cache-point maximum. + See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. + """ + @dataclass(init=False) class AnthropicModel(Model): @@ -478,7 +494,10 @@ def _get_tools( ] # Add cache_control to the last tool if enabled - if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')): + if tools and ( + cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions') + or model_settings.get('anthropic_cache_all') + ): # If True, use '5m'; otherwise use the specified ttl value ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs last_tool = tools[-1] @@ -747,8 +766,32 @@ async def _map_message( # noqa: C901 system_prompt_parts.insert(0, instructions) system_prompt = '\n\n'.join(system_prompt_parts) + # Add cache_control to the last message content if anthropic_cache_all is enabled + if anthropic_messages and (cache_all := model_settings.get('anthropic_cache_all')): + ttl: Literal['5m', '1h'] = '5m' if cache_all is True else cache_all + m = anthropic_messages[-1] + content = m['content'] + if isinstance(content, str): + # Convert string content to list format with cache_control + m['content'] = [ + { + 'text': content, + 'type': 'text', + 'cache_control': BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl), + } + ] + else: + # Add cache_control to the last content block + content = cast(list[BetaContentBlockParam], content) + self._add_cache_control_to_last_param(content, ttl) + + # Ensure total cache points don't exceed Anthropic's limit of 4 + self._limit_cache_points(anthropic_messages, model_settings) # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control - if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')): + if system_prompt and ( + cache_instructions := model_settings.get('anthropic_cache_instructions') + or model_settings.get('anthropic_cache_all') + ): # If True, use '5m'; otherwise use the specified ttl value ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions system_prompt_blocks = [ @@ -762,6 +805,63 @@ async def _map_message( # noqa: C901 return system_prompt, anthropic_messages + @staticmethod + def _limit_cache_points(messages: list[BetaMessageParam], model_settings: AnthropicModelSettings) -> None: + """Limit the number of cache points in messages to comply with Anthropic's 4-cache-point maximum. + + Anthropic allows a maximum of 4 cache points per request. This method ensures compliance by: + 1. Calculating how many cache points are already used by system-level settings + (anthropic_cache_instructions, anthropic_cache_tool_definitions, anthropic_cache_all) + 2. Determining how many cache points remain available for message-level caching + 3. Traversing messages from newest to oldest, keeping only the allowed number of cache points + 4. Removing cache_control from older cache points that exceed the limit + + This prioritizes recent cache points, which are typically more valuable for conversation continuity. + + Args: + messages: List of message parameters to limit cache points in. + model_settings: Model settings containing cache configuration. + """ + # Anthropic's maximum cache points per request + max_cache_points = 4 + used_cache_points = 0 + + # Calculate cache points used by system-level settings + if model_settings.get('anthropic_cache_all'): + # anthropic_cache_all adds cache points for both system instructions and last message + used_cache_points += 2 + else: + if model_settings.get('anthropic_cache_instructions'): + used_cache_points += 1 + if model_settings.get('anthropic_cache_tool_definitions'): + # Assume used one cache point for tool definitions + used_cache_points += 1 + + # Calculate remaining cache points available for message content + keep_cache_points = max_cache_points - used_cache_points + + # Traverse messages from back to front (newest to oldest) + remaining_cache_points = keep_cache_points + for message in reversed(messages): + content = message['content'] + # Skip if content is a string or None + if isinstance(content, str): + continue + content = cast(list[BetaContentBlockParam], content) + # Traverse content blocks from back to front within each message + for block in reversed(content): + # Cast to dict for TypedDict manipulation + block_dict = cast(dict[str, Any], block) + + # Check if this block has cache_control + if 'cache_control' in block_dict: + if remaining_cache_points > 0: + # Keep this cache point (within limit) + remaining_cache_points -= 1 + else: + # Remove cache_control as we've exceeded the limit + del block_dict['cache_control'] + @staticmethod def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None: """Add cache control to the last content block param. diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 86ba5a68d3..4d0daddd42 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -588,6 +588,166 @@ def my_tool(value: str) -> str: # pragma: no cover assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'}) +async def test_anthropic_cache_all(allow_model_requests: None): + """Test that anthropic_cache_all caches both system instructions and last message.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions to cache.', + model_settings=AnthropicModelSettings( + anthropic_cache_all=True, + ), + ) + + await agent.run('User message') + + # Verify both system and last message have cache_control + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + system = completion_kwargs['system'] + messages = completion_kwargs['messages'] + + # System should have cache_control + assert system == snapshot( + [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}] + ) + + # Last message content should have cache_control + assert messages[-1]['content'][-1] == snapshot( + {'type': 'text', 'text': 'User message', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}} + ) + + +async def test_anthropic_cache_all_with_custom_ttl(allow_model_requests: None): + """Test that anthropic_cache_all supports custom TTL values.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions.', + model_settings=AnthropicModelSettings( + anthropic_cache_all='1h', # Custom 1h TTL + ), + ) + + await agent.run('User message') + + # Verify both use 1h TTL + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + system = completion_kwargs['system'] + messages = completion_kwargs['messages'] + + assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'}) + assert messages[-1]['content'][-1]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'}) + + +async def test_limit_cache_points_with_cache_all(allow_model_requests: None): + """Test that cache points are limited when using cache_all + CachePoint markers.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions.', + model_settings=AnthropicModelSettings( + anthropic_cache_all=True, # Uses 2 cache points + ), + ) + + # Add 3 CachePoint markers (total would be 5: 2 from cache_all + 3 from markers) + # Only 2 CachePoint markers should be kept (newest ones) + await agent.run( + [ + 'Context 1', + CachePoint(), # Oldest, should be removed + 'Context 2', + CachePoint(), # Should be kept + 'Context 3', + CachePoint(), # Should be kept + 'Question', + ] + ) + + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + messages = completion_kwargs['messages'] + + # Count cache_control occurrences in messages + cache_count = 0 + for msg in messages: + for block in msg['content']: + if 'cache_control' in block: + cache_count += 1 + + # anthropic_cache_all uses 2 cache points (system + last message) + # With 3 CachePoint markers, we'd have 5 total + # Limit is 4, so 1 oldest CachePoint should be removed + # Result: 2 cache points in messages (from the 2 newest CachePoints) + # The cache_all's last message cache is applied after limiting + assert cache_count == 2 + + +async def test_limit_cache_points_all_settings(allow_model_requests: None): + """Test cache point limiting with all cache settings enabled.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + + agent = Agent( + m, + system_prompt='System instructions.', + model_settings=AnthropicModelSettings( + anthropic_cache_instructions=True, # 1 cache point + anthropic_cache_tool_definitions=True, # 1 cache point + ), + ) + + @agent.tool_plain + def my_tool() -> str: # pragma: no cover + return 'result' + + # Add 3 CachePoint markers (total would be 5: 2 from settings + 3 from markers) + # Only 2 CachePoint markers should be kept + await agent.run( + [ + 'Context 1', + CachePoint(), # Oldest, should be removed + 'Context 2', + CachePoint(), # Should be kept + 'Context 3', + CachePoint(), # Should be kept + 'Question', + ] + ) + + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + messages = completion_kwargs['messages'] + + # Count cache_control in messages (excluding system and tools) + cache_count = 0 + for msg in messages: + for block in msg['content']: + if 'cache_control' in block: + cache_count += 1 + + # Should have exactly 2 cache points in messages + # (4 total - 1 system - 1 tool = 2 available for messages) + assert cache_count == 2 + + async def test_async_request_text_response(allow_model_requests: None): c = completion_message( [BetaTextBlock(text='world', type='text')], From 9bf3f6ed5b2faa9e2a4ec6abbc455ee091c79c06 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Wed, 19 Nov 2025 11:41:29 +0800 Subject: [PATCH 02/14] fix ci issues --- docs/models/anthropic.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index ce9fa7b67c..ffc8487698 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -189,4 +189,5 @@ async def main(): 'Context 3', CachePoint(), # Automatically removed (oldest) 'Question' ]) + print(result.output) ``` From 0f0dd763dc82ece53a216253f7014e409f052f6d Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Wed, 19 Nov 2025 12:00:06 +0800 Subject: [PATCH 03/14] use BetaTextBlockParam and add nocover --- pydantic_ai_slim/pydantic_ai/models/anthropic.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 4c36c546dd..abfa4f1d14 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -773,12 +773,12 @@ async def _map_message( # noqa: C901 content = m['content'] if isinstance(content, str): # Convert string content to list format with cache_control - m['content'] = [ - { - 'text': content, - 'type': 'text', - 'cache_control': BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl), - } + m['content'] = [ # pragma: no cover + BetaTextBlockParam( + text=content, + type='text', + cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl), + ) ] else: # Add cache_control to the last content block @@ -845,7 +845,7 @@ def _limit_cache_points(messages: list[BetaMessageParam], model_settings: Anthro for message in reversed(messages): content = message['content'] # Skip if content is a string or None - if isinstance(content, str): + if isinstance(content, str): # pragma: no cover continue content = cast(list[BetaContentBlockParam], content) # Traverse content blocks from back to front within each message From 8bf1d945c17da084b8e2c1f3b7a93d9f3f070af1 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Thu, 20 Nov 2025 11:26:04 +0800 Subject: [PATCH 04/14] use anthropic_cache_messages --- docs/models/anthropic.md | 74 ++++++++--- .../pydantic_ai/models/anthropic.py | 120 ++++++++---------- tests/models/test_anthropic.py | 45 +++---- 3 files changed, 132 insertions(+), 107 deletions(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index ffc8487698..b0fec0d324 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -85,7 +85,7 @@ Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-wit 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it 2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly 3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly -4. **Cache All (Convenience)**: Set [`AnthropicModelSettings.anthropic_cache_all`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_all] to `True` to automatically cache both system instructions and the last user message +4. **Cache Last Message (Convenience)**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache the last user message You can combine multiple strategies for maximum savings: @@ -93,12 +93,12 @@ You can combine multiple strategies for maximum savings: from pydantic_ai import Agent, CachePoint, RunContext from pydantic_ai.models.anthropic import AnthropicModelSettings -# Option 1: Use anthropic_cache_all for convenience (caches system + last message) +# Option 1: Use anthropic_cache_messages for convenience (caches last message only) agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', model_settings=AnthropicModelSettings( - anthropic_cache_all=True, # Caches both system prompt and last message + anthropic_cache_messages=True, # Caches the last user message ), ) @@ -159,35 +159,77 @@ async def main(): ### Cache Point Limits -Anthropic enforces a maximum of 4 cache points per request. Pydantic AI automatically manages this limit: +Anthropic enforces a maximum of 4 cache points per request. Pydantic AI automatically manages this limit to ensure your requests always comply without errors. -- **`anthropic_cache_all`**: Uses 2 cache points (system instructions + last message) -- **`anthropic_cache_instructions`**: Uses 1 cache point -- **`anthropic_cache_tool_definitions`**: Uses 1 cache point -- **`CachePoint` markers**: Use remaining available cache points +#### How Cache Points Are Allocated -When the total exceeds 4 cache points, Pydantic AI automatically removes cache points from **older messages** (keeping the most recent ones), ensuring your requests always comply with Anthropic's limits without errors. +Cache points can be placed in three locations: + +1. **System Prompt**: Via `anthropic_cache_instructions` setting (adds cache point to last system prompt block) +2. **Tool Definitions**: Via `anthropic_cache_tool_definitions` setting (adds cache point to last tool definition) +3. **Messages**: Via `CachePoint` markers or `anthropic_cache_messages` setting (adds cache points to message content) + +Each setting uses **at most 1 cache point**, but you can combine them: ```python {test="skip"} from pydantic_ai import Agent, CachePoint from pydantic_ai.models.anthropic import AnthropicModelSettings +# Example: Using all 3 cache point sources +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='Detailed instructions...', + model_settings=AnthropicModelSettings( + anthropic_cache_instructions=True, # 1 cache point + anthropic_cache_tool_definitions=True, # 1 cache point + anthropic_cache_messages=True, # 1 cache point + ), +) + +@agent.tool_plain +def my_tool() -> str: + return 'result' + +async def main(): + # This uses 3 cache points (instructions + tools + last message) + # You can add 1 more CachePoint marker before hitting the limit + result = await agent.run([ + 'Context', CachePoint(), # 4th cache point - OK + 'Question' + ]) +``` + +#### Automatic Cache Point Limiting + +When cache points from all sources (settings + `CachePoint` markers) exceed 4, Pydantic AI automatically removes excess cache points from **older message content** (keeping the most recent ones): + +```python {test="skip"} agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Instructions...', model_settings=AnthropicModelSettings( - anthropic_cache_all=True, # Uses 2 cache points + anthropic_cache_instructions=True, # 1 cache point + anthropic_cache_tool_definitions=True, # 1 cache point ), ) +@agent.tool_plain +def search() -> str: + return 'data' + async def main(): - # Even with multiple CachePoint markers, only 2 more will be kept - # (4 total limit - 2 from cache_all = 2 available) + # Already using 2 cache points (instructions + tools) + # Can add 2 more CachePoint markers (4 total limit) result = await agent.run([ - 'Context 1', CachePoint(), # Will be kept - 'Context 2', CachePoint(), # Will be kept - 'Context 3', CachePoint(), # Automatically removed (oldest) + 'Context 1', CachePoint(), # Oldest - will be removed + 'Context 2', CachePoint(), # Will be kept (3rd point) + 'Context 3', CachePoint(), # Will be kept (4th point) 'Question' ]) - print(result.output) + # Final cache points: instructions + tools + Context 2 + Context 3 = 4 ``` + +**Key Points**: +- System and tool cache points are **always preserved** +- Message cache points are removed from oldest to newest when limit is exceeded +- This ensures critical caching (instructions/tools) is maintained while still benefiting from message-level caching diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index abfa4f1d14..964bb620de 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -169,18 +169,15 @@ class AnthropicModelSettings(ModelSettings, total=False): See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ - anthropic_cache_all: bool | Literal['5m', '1h'] - """Convenience setting to enable caching for both system instructions and the last user message. + anthropic_cache_messages: bool | Literal['5m', '1h'] + """Convenience setting to enable caching for the last user message. - When enabled, this automatically adds cache points to: - 1. The last system prompt block (system instructions) - 2. The last content block in the final user message - - This is equivalent to setting both `anthropic_cache_instructions` and adding a cache point - to the last message, but more convenient for common use cases. + When enabled, this automatically adds a cache point to the last content block + in the final user message, which is useful for caching conversation history + or context in multi-turn conversations. If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly. - Note: Uses 2 of Anthropic's 4 available cache points per request. Any additional CachePoint + Note: Uses 1 of Anthropic's 4 available cache points per request. Any additional CachePoint markers in messages will be automatically limited to respect the 4-cache-point maximum. See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ @@ -349,7 +346,7 @@ async def _messages_create( tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) - + self._limit_cache_points(system_prompt, anthropic_messages, tools) try: extra_headers = self._map_extra_headers(beta_features, model_settings) @@ -392,7 +389,7 @@ async def _messages_count_tokens( tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) - + self._limit_cache_points(system_prompt, anthropic_messages, tools) try: extra_headers = self._map_extra_headers(beta_features, model_settings) @@ -494,10 +491,7 @@ def _get_tools( ] # Add cache_control to the last tool if enabled - if tools and ( - cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions') - or model_settings.get('anthropic_cache_all') - ): + if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')): # If True, use '5m'; otherwise use the specified ttl value ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs last_tool = tools[-1] @@ -766,9 +760,9 @@ async def _map_message( # noqa: C901 system_prompt_parts.insert(0, instructions) system_prompt = '\n\n'.join(system_prompt_parts) - # Add cache_control to the last message content if anthropic_cache_all is enabled - if anthropic_messages and (cache_all := model_settings.get('anthropic_cache_all')): - ttl: Literal['5m', '1h'] = '5m' if cache_all is True else cache_all + # Add cache_control to the last message content if anthropic_cache_messages is enabled + if anthropic_messages and (cache_messages := model_settings.get('anthropic_cache_messages')): + ttl: Literal['5m', '1h'] = '5m' if cache_messages is True else cache_messages m = anthropic_messages[-1] content = m['content'] if isinstance(content, str): @@ -785,13 +779,8 @@ async def _map_message( # noqa: C901 content = cast(list[BetaContentBlockParam], content) self._add_cache_control_to_last_param(content, ttl) - # Ensure total cache points don't exceed Anthropic's limit of 4 - self._limit_cache_points(anthropic_messages, model_settings) # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control - if system_prompt and ( - cache_instructions := model_settings.get('anthropic_cache_instructions') - or model_settings.get('anthropic_cache_all') - ): + if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')): # If True, use '5m'; otherwise use the specified ttl value ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions system_prompt_blocks = [ @@ -806,60 +795,57 @@ async def _map_message( # noqa: C901 return system_prompt, anthropic_messages @staticmethod - def _limit_cache_points(messages: list[BetaMessageParam], model_settings: AnthropicModelSettings) -> None: - """Limit the number of cache points in messages to comply with Anthropic's 4-cache-point maximum. - - Anthropic allows a maximum of 4 cache points per request. This method ensures compliance by: - 1. Calculating how many cache points are already used by system-level settings - (anthropic_cache_instructions, anthropic_cache_tool_definitions, anthropic_cache_all) - 2. Determining how many cache points remain available for message-level caching - 3. Traversing messages from newest to oldest, keeping only the allowed number of cache points - 4. Removing cache_control from older cache points that exceed the limit + def _limit_cache_points( + system_prompt: str | list[BetaTextBlockParam], + anthropic_messages: list[BetaMessageParam], + tools: list[BetaToolUnionParam], + ) -> None: + """Limit the number of cache points in the request to Anthropic's maximum. + + Strategy: + 1. Keep the last cache point in system_prompt and tools (if present) + 2. Count cache points already used in system_prompt and tools + 3. Traverse messages from newest to oldest, keeping the most recent cache points + until the maximum limit is reached + """ + MAX_CACHE_POINTS = 4 - This prioritizes recent cache points, which are typically more valuable for conversation continuity. + # Count existing cache points in system prompt + used_cache_points = ( + sum(1 for block in system_prompt if 'cache_control' in cast(dict[str, Any], block)) + if isinstance(system_prompt, list) + else 0 + ) - Args: - messages: List of message parameters to limit cache points in. - model_settings: Model settings containing cache configuration. - """ - # Anthropic's maximum cache points per request - max_cache_points = 4 - used_cache_points = 0 - - # Calculate cache points used by system-level settings - if model_settings.get('anthropic_cache_all'): - # anthropic_cache_all adds cache points for both system instructions and last message - used_cache_points += 2 - else: - if model_settings.get('anthropic_cache_instructions'): - used_cache_points += 1 - if model_settings.get('anthropic_cache_tool_definitions'): - # Assume used one cache point for tool definitions + # Count existing cache points in tools (any tool may have cache_control) + # Note: cache_control can be in the middle of tools list if builtin tools are added after + for tool in tools: + if 'cache_control' in tool: used_cache_points += 1 - # Calculate remaining cache points available for message content - keep_cache_points = max_cache_points - used_cache_points - - # Traverse messages from back to front (newest to oldest) - remaining_cache_points = keep_cache_points - for message in reversed(messages): + # Calculate remaining cache points budget for messages + remaining_budget = MAX_CACHE_POINTS - used_cache_points + if remaining_budget < 0: # pragma: no cover + raise UserError( + f'Too many cache points for Anthropic request. ' + f'System prompt and tool definitions already use {used_cache_points} cache points, ' + f'which exceeds the maximum of {MAX_CACHE_POINTS}.' + ) + # Remove excess cache points from messages (newest to oldest) + for message in reversed(anthropic_messages): content = message['content'] - # Skip if content is a string or None if isinstance(content, str): # pragma: no cover continue - content = cast(list[BetaContentBlockParam], content) - # Traverse content blocks from back to front within each message - for block in reversed(content): - # Cast to dict for TypedDict manipulation + + # Process content blocks in reverse order (newest first) + for block in reversed(cast(list[BetaContentBlockParam], content)): block_dict = cast(dict[str, Any], block) - # Check if this block has cache_control if 'cache_control' in block_dict: - if remaining_cache_points > 0: - # Keep this cache point (within limit) - remaining_cache_points -= 1 + if remaining_budget > 0: + remaining_budget -= 1 else: - # Remove cache_control as we've exceeded the limit + # Exceeded limit, remove this cache point del block_dict['cache_control'] @staticmethod diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 4d0daddd42..a033f9cad3 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -588,8 +588,8 @@ def my_tool(value: str) -> str: # pragma: no cover assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'}) -async def test_anthropic_cache_all(allow_model_requests: None): - """Test that anthropic_cache_all caches both system instructions and last message.""" +async def test_anthropic_cache_messages(allow_model_requests: None): + """Test that anthropic_cache_messages caches only the last message.""" c = completion_message( [BetaTextBlock(text='Response', type='text')], usage=BetaUsage(input_tokens=10, output_tokens=5), @@ -600,21 +600,19 @@ async def test_anthropic_cache_all(allow_model_requests: None): m, system_prompt='System instructions to cache.', model_settings=AnthropicModelSettings( - anthropic_cache_all=True, + anthropic_cache_messages=True, ), ) await agent.run('User message') - # Verify both system and last message have cache_control + # Verify only last message has cache_control, not system completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] system = completion_kwargs['system'] messages = completion_kwargs['messages'] - # System should have cache_control - assert system == snapshot( - [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}] - ) + # System should NOT have cache_control (should be a plain string) + assert system == snapshot('System instructions to cache.') # Last message content should have cache_control assert messages[-1]['content'][-1] == snapshot( @@ -622,8 +620,8 @@ async def test_anthropic_cache_all(allow_model_requests: None): ) -async def test_anthropic_cache_all_with_custom_ttl(allow_model_requests: None): - """Test that anthropic_cache_all supports custom TTL values.""" +async def test_anthropic_cache_messages_with_custom_ttl(allow_model_requests: None): + """Test that anthropic_cache_messages supports custom TTL values.""" c = completion_message( [BetaTextBlock(text='Response', type='text')], usage=BetaUsage(input_tokens=10, output_tokens=5), @@ -634,23 +632,21 @@ async def test_anthropic_cache_all_with_custom_ttl(allow_model_requests: None): m, system_prompt='System instructions.', model_settings=AnthropicModelSettings( - anthropic_cache_all='1h', # Custom 1h TTL + anthropic_cache_messages='1h', # Custom 1h TTL ), ) await agent.run('User message') - # Verify both use 1h TTL + # Verify use 1h TTL completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] - system = completion_kwargs['system'] messages = completion_kwargs['messages'] - assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'}) assert messages[-1]['content'][-1]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'}) -async def test_limit_cache_points_with_cache_all(allow_model_requests: None): - """Test that cache points are limited when using cache_all + CachePoint markers.""" +async def test_limit_cache_points_with_cache_messages(allow_model_requests: None): + """Test that cache points are limited when using cache_messages + CachePoint markers.""" c = completion_message( [BetaTextBlock(text='Response', type='text')], usage=BetaUsage(input_tokens=10, output_tokens=5), @@ -661,12 +657,12 @@ async def test_limit_cache_points_with_cache_all(allow_model_requests: None): m, system_prompt='System instructions.', model_settings=AnthropicModelSettings( - anthropic_cache_all=True, # Uses 2 cache points + anthropic_cache_messages=True, # Uses 1 cache point ), ) - # Add 3 CachePoint markers (total would be 5: 2 from cache_all + 3 from markers) - # Only 2 CachePoint markers should be kept (newest ones) + # Add 4 CachePoint markers (total would be 5: 1 from cache_messages + 4 from markers) + # Only 3 CachePoint markers should be kept (newest ones) await agent.run( [ 'Context 1', @@ -675,6 +671,8 @@ async def test_limit_cache_points_with_cache_all(allow_model_requests: None): CachePoint(), # Should be kept 'Context 3', CachePoint(), # Should be kept + 'Context 4', + CachePoint(), # Should be kept 'Question', ] ) @@ -689,12 +687,11 @@ async def test_limit_cache_points_with_cache_all(allow_model_requests: None): if 'cache_control' in block: cache_count += 1 - # anthropic_cache_all uses 2 cache points (system + last message) - # With 3 CachePoint markers, we'd have 5 total + # anthropic_cache_messages uses 1 cache point (last message only) + # With 4 CachePoint markers, we'd have 5 total # Limit is 4, so 1 oldest CachePoint should be removed - # Result: 2 cache points in messages (from the 2 newest CachePoints) - # The cache_all's last message cache is applied after limiting - assert cache_count == 2 + # Result: 3 cache points from CachePoint markers + 1 from cache_messages = 4 total + assert cache_count == 4 async def test_limit_cache_points_all_settings(allow_model_requests: None): From 240f71c566bc4925e31cf830f2c29ce2ac31ee7a Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Thu, 20 Nov 2025 11:28:11 +0800 Subject: [PATCH 05/14] fix ci --- docs/models/anthropic.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index b0fec0d324..a69353ffc5 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -197,6 +197,10 @@ async def main(): 'Context', CachePoint(), # 4th cache point - OK 'Question' ]) + print(result.output) + usage = result.usage() + print(f'Cache write tokens: {usage.cache_write_tokens}') + print(f'Cache read tokens: {usage.cache_read_tokens}') ``` #### Automatic Cache Point Limiting @@ -227,6 +231,10 @@ async def main(): 'Question' ]) # Final cache points: instructions + tools + Context 2 + Context 3 = 4 + print(result.output) + usage = result.usage() + print(f'Cache write tokens: {usage.cache_write_tokens}') + print(f'Cache read tokens: {usage.cache_read_tokens}') ``` **Key Points**: From ae63b134c3b180b3ee969721a1a59d1f03ebc21e Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Thu, 20 Nov 2025 11:33:31 +0800 Subject: [PATCH 06/14] update docstring for _limit_cache_points --- .../pydantic_ai/models/anthropic.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 964bb620de..2e4546fa2f 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -802,11 +802,26 @@ def _limit_cache_points( ) -> None: """Limit the number of cache points in the request to Anthropic's maximum. + Anthropic enforces a maximum of 4 cache points per request. This method ensures + compliance by counting existing cache points and removing excess ones from messages. + Strategy: - 1. Keep the last cache point in system_prompt and tools (if present) - 2. Count cache points already used in system_prompt and tools - 3. Traverse messages from newest to oldest, keeping the most recent cache points - until the maximum limit is reached + 1. Count cache points in system_prompt (can be multiple if list of blocks) + 2. Count cache points in tools (can be in any position, not just last) + 3. Raise UserError if system + tools already exceed MAX_CACHE_POINTS + 4. Calculate remaining budget for message cache points + 5. Traverse messages from newest to oldest, keeping the most recent cache points + within the remaining budget + 6. Remove excess cache points from older messages to stay within limit + + Cache point priority (always preserved): + - System prompt cache points + - Tool definition cache points + - Message cache points (newest first, oldest removed if needed) + + Raises: + UserError: If system_prompt and tools combined already exceed MAX_CACHE_POINTS (4). + This indicates a configuration error that cannot be auto-fixed. """ MAX_CACHE_POINTS = 4 From 6cceb59ccb9c001b098462770e7fc7d32c8acdfd Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Thu, 20 Nov 2025 11:40:49 +0800 Subject: [PATCH 07/14] fix doc example issue --- docs/models/anthropic.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index a69353ffc5..70727140f5 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -208,6 +208,8 @@ async def main(): When cache points from all sources (settings + `CachePoint` markers) exceed 4, Pydantic AI automatically removes excess cache points from **older message content** (keeping the most recent ones): ```python {test="skip"} +from pydantic_ai import Agent, CachePoint +from pydantic_ai.models.anthropic import AnthropicModelSettings agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Instructions...', From 0aa82ad995a1b64d0487e2edd59bf724dcd95574 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Thu, 20 Nov 2025 11:54:09 +0800 Subject: [PATCH 08/14] fix doc check --- docs/models/anthropic.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index 70727140f5..fe46435fb7 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -210,6 +210,7 @@ When cache points from all sources (settings + `CachePoint` markers) exceed 4, P ```python {test="skip"} from pydantic_ai import Agent, CachePoint from pydantic_ai.models.anthropic import AnthropicModelSettings + agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Instructions...', From 779bd40fb94a6cc86219e52d973a533c2fd07fb4 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Fri, 21 Nov 2025 11:04:13 +0800 Subject: [PATCH 09/14] update docs and add real case --- docs/models/anthropic.md | 53 +++++++++++++++++++++------------- tests/models/test_anthropic.py | 38 ++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index fe46435fb7..5e2cbae588 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -93,23 +93,32 @@ You can combine multiple strategies for maximum savings: from pydantic_ai import Agent, CachePoint, RunContext from pydantic_ai.models.anthropic import AnthropicModelSettings -# Option 1: Use anthropic_cache_messages for convenience (caches last message only) +# Example 1: Use anthropic_cache_messages for automatic last message caching agent = Agent( 'anthropic:claude-sonnet-4-5', - system_prompt='Detailed instructions...', + system_prompt='You are a helpful assistant.', model_settings=AnthropicModelSettings( - anthropic_cache_messages=True, # Caches the last user message + anthropic_cache_messages=True, # Automatically caches the last message ), ) -# Option 2: Fine-grained control with individual settings +async def main(): + # The last message is automatically cached - no need for manual CachePoint + result1 = await agent.run('What is the capital of France?') + + # Subsequent calls with similar conversation benefit from cache + result2 = await agent.run('What is the capital of Germany?') + print(f'Cache write: {result1.usage().cache_write_tokens}') + print(f'Cache read: {result2.usage().cache_read_tokens}') + +# Example 2: Combine with other cache settings for comprehensive caching agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', model_settings=AnthropicModelSettings( - # Use True for default 5m TTL, or specify '5m' / '1h' directly - anthropic_cache_instructions=True, - anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions + anthropic_cache_instructions=True, # Cache system instructions + anthropic_cache_tool_definitions='1h', # Cache tool definitions with 1h TTL + anthropic_cache_messages=True, # Also cache the last message ), ) @@ -119,21 +128,24 @@ def search_docs(ctx: RunContext, query: str) -> str: return f'Results for {query}' async def main(): - # First call - writes to cache - result1 = await agent.run([ + # All three cache points are used: instructions, tools, and last message + result = await agent.run('Search for Python best practices') + print(result.output) + +# Example 3: Fine-grained control with manual CachePoint markers +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='Instructions...', +) + +async def main(): + # Manually control cache points for specific content blocks + result = await agent.run([ 'Long context from documentation...', - CachePoint(), + CachePoint(), # Cache everything up to this point 'First question' ]) - - # Subsequent calls - read from cache (90% cost reduction) - result2 = await agent.run([ - 'Long context from documentation...', # Same content - CachePoint(), - 'Second question' - ]) - print(f'First: {result1.output}') - print(f'Second: {result2.output}') + print(result.output) ``` Access cache usage statistics via `result.usage()`: @@ -242,5 +254,6 @@ async def main(): **Key Points**: - System and tool cache points are **always preserved** -- Message cache points are removed from oldest to newest when limit is exceeded +- The cache point created by `anthropic_cache_messages` is **always preserved** (as it's the newest message cache point) +- Additional `CachePoint` markers in messages are removed from oldest to newest when the limit is exceeded - This ensures critical caching (instructions/tools) is maintained while still benefiting from message-level caching diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index a033f9cad3..c6fa16d76f 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -6673,3 +6673,41 @@ async def test_anthropic_bedrock_count_tokens_not_supported(env: TestEnv): with pytest.raises(UserError, match='AsyncAnthropicBedrock client does not support `count_tokens` api.'): await agent.run('hello', usage_limits=UsageLimits(input_tokens_limit=20, count_tokens_before_request=True)) + + +@pytest.mark.vcr() +async def test_anthropic_cache_messages_real_api(allow_model_requests: None, anthropic_api_key: str): + """Test that anthropic_cache_messages setting adds cache_control and produces cache usage metrics. + + This test uses a cassette to verify the cache behavior without making real API calls in CI. + When run with real API credentials, it demonstrates that: + 1. The first call with a long context creates a cache (cache_write_tokens > 0) + 2. Follow-up messages in the same conversation can read from that cache (cache_read_tokens > 0) + """ + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(api_key=anthropic_api_key)) + agent = Agent( + m, + system_prompt='You are a helpful assistant.', + model_settings=AnthropicModelSettings( + anthropic_cache_messages=True, + ), + ) + + # First call with a longer message - this will cache the message content + result1 = await agent.run('Please explain what Python is and its main use cases. ' * 10) + usage1 = result1.usage() + + # With anthropic_cache_messages, the first call should write cache for the last message + # (Note: cache_write_tokens might be 0 if content is too short, but the setting is applied) + assert usage1.requests == 1 + assert usage1.output_tokens > 0 + + # Continue the conversation - this message appends to history + # The previous cached message should still be in the request + result2 = await agent.run('Can you summarize that in one sentence?', message_history=result1.all_messages()) + usage2 = result2.usage() + + # The second call should potentially read from cache if the previous message is still cached + # (cache_read_tokens > 0 when cache hit occurs) + assert usage2.requests == 1 + assert usage2.output_tokens > 0 From bf0dc8419a1956337131baacfd48a2a5d624a6db Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Fri, 21 Nov 2025 11:58:25 +0800 Subject: [PATCH 10/14] test via real api key --- ...est_anthropic_cache_messages_real_api.yaml | 327 ++++++++++++++++++ tests/models/test_anthropic.py | 10 +- 2 files changed, 334 insertions(+), 3 deletions(-) create mode 100644 tests/models/cassettes/test_anthropic/test_anthropic_cache_messages_real_api.yaml diff --git a/tests/models/cassettes/test_anthropic/test_anthropic_cache_messages_real_api.yaml b/tests/models/cassettes/test_anthropic/test_anthropic_cache_messages_real_api.yaml new file mode 100644 index 0000000000..a1711a107c --- /dev/null +++ b/tests/models/cassettes/test_anthropic/test_anthropic_cache_messages_real_api.yaml @@ -0,0 +1,327 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '5617' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + max_tokens: 4096 + messages: + - content: + - cache_control: + ttl: 5m + type: ephemeral + text: 'Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. ' + type: text + role: user + model: claude-sonnet-4-5 + stream: false + system: You are a helpful assistant. + uri: https://api.anthropic.com/v1/messages?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '1986' + content-type: + - application/json + retry-after: + - '3' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + parsed_body: + content: + - text: |- + # What is Python? + + **Python** is a high-level, interpreted programming language created by Guido van Rossum and first released in 1991. It emphasizes code readability and simplicity, using clear syntax that often resembles plain English. + + ## Key Characteristics: + - **Easy to learn**: Simple, intuitive syntax ideal for beginners + - **Interpreted**: Code runs line-by-line without compilation + - **Dynamically typed**: No need to declare variable types + - **Versatile**: Supports multiple programming paradigms (procedural, object-oriented, functional) + - **Extensive libraries**: Vast ecosystem of packages and frameworks + + ## Main Use Cases: + + ### 1. **Web Development** + - Frameworks: Django, Flask, FastAPI + - Backend services and APIs + + ### 2. **Data Science & Analytics** + - Libraries: Pandas, NumPy, Matplotlib + - Data manipulation and visualization + + ### 3. **Machine Learning & AI** + - Frameworks: TensorFlow, PyTorch, scikit-learn + - Deep learning and predictive modeling + + ### 4. **Automation & Scripting** + - Task automation + - System administration + + ### 5. **Scientific Computing** + - Research and computational analysis + - Libraries: SciPy, SymPy + + ### 6. **Software Development** + - Application development + - Prototyping + + ### 7. **Cybersecurity** + - Penetration testing tools + - Security automation + + ### 8. **Game Development** + - Libraries: Pygame + - Prototyping game logic + + Python's versatility and ease of use make it one of the most popular programming languages worldwide, used by companies like Google, Netflix, NASA, and many others. + type: text + id: msg_01FdUT99HgS9cAzcv4ztTDYJ + model: claude-sonnet-4-5-20250929 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: + cache_creation: + ephemeral_1h_input_tokens: 0 + ephemeral_5m_input_tokens: 1111 + cache_creation_input_tokens: 1111 + cache_read_input_tokens: 0 + input_tokens: 3 + output_tokens: 391 + service_tier: standard + status: + code: 200 + message: OK +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '7343' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + max_tokens: 4096 + messages: + - content: + - text: 'Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. ' + type: text + role: user + - content: + - text: |- + # What is Python? + + **Python** is a high-level, interpreted programming language created by Guido van Rossum and first released in 1991. It emphasizes code readability and simplicity, using clear syntax that often resembles plain English. + + ## Key Characteristics: + - **Easy to learn**: Simple, intuitive syntax ideal for beginners + - **Interpreted**: Code runs line-by-line without compilation + - **Dynamically typed**: No need to declare variable types + - **Versatile**: Supports multiple programming paradigms (procedural, object-oriented, functional) + - **Extensive libraries**: Vast ecosystem of packages and frameworks + + ## Main Use Cases: + + ### 1. **Web Development** + - Frameworks: Django, Flask, FastAPI + - Backend services and APIs + + ### 2. **Data Science & Analytics** + - Libraries: Pandas, NumPy, Matplotlib + - Data manipulation and visualization + + ### 3. **Machine Learning & AI** + - Frameworks: TensorFlow, PyTorch, scikit-learn + - Deep learning and predictive modeling + + ### 4. **Automation & Scripting** + - Task automation + - System administration + + ### 5. **Scientific Computing** + - Research and computational analysis + - Libraries: SciPy, SymPy + + ### 6. **Software Development** + - Application development + - Prototyping + + ### 7. **Cybersecurity** + - Penetration testing tools + - Security automation + + ### 8. **Game Development** + - Libraries: Pygame + - Prototyping game logic + + Python's versatility and ease of use make it one of the most popular programming languages worldwide, used by companies like Google, Netflix, NASA, and many others. + type: text + role: assistant + - content: + - cache_control: + ttl: 5m + type: ephemeral + text: Can you summarize that in one sentence? + type: text + role: user + model: claude-sonnet-4-5 + stream: false + system: You are a helpful assistant. + uri: https://api.anthropic.com/v1/messages?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '576' + content-type: + - application/json + retry-after: + - '56' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + parsed_body: + content: + - text: Python is a beginner-friendly, versatile programming language widely used for web development, data science, + machine learning, automation, and scientific computing. + type: text + id: msg_01PYfyaNS7Rysss2xMNqQgy9 + model: claude-sonnet-4-5-20250929 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: + cache_creation: + ephemeral_1h_input_tokens: 0 + ephemeral_5m_input_tokens: 403 + cache_creation_input_tokens: 403 + cache_read_input_tokens: 1111 + input_tokens: 3 + output_tokens: 33 + service_tier: standard + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index c6fa16d76f..a5408258de 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -6684,7 +6684,7 @@ async def test_anthropic_cache_messages_real_api(allow_model_requests: None, ant 1. The first call with a long context creates a cache (cache_write_tokens > 0) 2. Follow-up messages in the same conversation can read from that cache (cache_read_tokens > 0) """ - m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(api_key=anthropic_api_key)) + m = AnthropicModel('claude-sonnet-4-5', provider=AnthropicProvider(api_key=anthropic_api_key)) agent = Agent( m, system_prompt='You are a helpful assistant.', @@ -6694,12 +6694,13 @@ async def test_anthropic_cache_messages_real_api(allow_model_requests: None, ant ) # First call with a longer message - this will cache the message content - result1 = await agent.run('Please explain what Python is and its main use cases. ' * 10) + result1 = await agent.run('Please explain what Python is and its main use cases. ' * 100) usage1 = result1.usage() # With anthropic_cache_messages, the first call should write cache for the last message - # (Note: cache_write_tokens might be 0 if content is too short, but the setting is applied) + # (cache_write_tokens > 0 indicates that caching occurred) assert usage1.requests == 1 + assert usage1.cache_write_tokens > 0 assert usage1.output_tokens > 0 # Continue the conversation - this message appends to history @@ -6709,5 +6710,8 @@ async def test_anthropic_cache_messages_real_api(allow_model_requests: None, ant # The second call should potentially read from cache if the previous message is still cached # (cache_read_tokens > 0 when cache hit occurs) + # (cache_write_tokens > 0 as new message is added to cache) assert usage2.requests == 1 + assert usage2.cache_read_tokens > 0 + assert usage2.cache_write_tokens > 0 assert usage2.output_tokens > 0 From 7f317f0a34854ebb6a6da313b0823f2baf8b8337 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Fri, 21 Nov 2025 13:19:19 +0800 Subject: [PATCH 11/14] fix docs ruff issues --- docs/models/anthropic.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index 5e2cbae588..aa5460bd0e 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -127,7 +127,7 @@ def search_docs(ctx: RunContext, query: str) -> str: """Search documentation.""" return f'Results for {query}' -async def main(): +async def main(): # noqa: F811 # All three cache points are used: instructions, tools, and last message result = await agent.run('Search for Python best practices') print(result.output) @@ -138,7 +138,7 @@ agent = Agent( system_prompt='Instructions...', ) -async def main(): +async def main(): # noqa: F811 # Manually control cache points for specific content blocks result = await agent.run([ 'Long context from documentation...', @@ -162,7 +162,7 @@ agent = Agent( ), ) -async def main(): +async def main(): # noqa: F811 result = await agent.run('Your question') usage = result.usage() print(f'Cache write tokens: {usage.cache_write_tokens}') @@ -202,7 +202,7 @@ agent = Agent( def my_tool() -> str: return 'result' -async def main(): +async def main(): # noqa: F811 # This uses 3 cache points (instructions + tools + last message) # You can add 1 more CachePoint marker before hitting the limit result = await agent.run([ @@ -236,7 +236,7 @@ agent = Agent( def search() -> str: return 'data' -async def main(): +async def main(): # noqa: F811 # Already using 2 cache points (instructions + tools) # Can add 2 more CachePoint markers (4 total limit) result = await agent.run([ From 63500d77e60647594e9246096e91248be7cd7e2f Mon Sep 17 00:00:00 2001 From: Zhongsheng Ji <9573586@qq.com> Date: Sat, 22 Nov 2025 09:25:55 +0800 Subject: [PATCH 12/14] Update docs/models/anthropic.md Co-authored-by: Douwe Maan --- docs/models/anthropic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index aa5460bd0e..34417d6679 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -85,7 +85,7 @@ Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-wit 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it 2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly 3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly -4. **Cache Last Message (Convenience)**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache the last user message +4. **Cache All Messages**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache all messages You can combine multiple strategies for maximum savings: From 03dfa1986780b4cfef3191b93ff30df851bd83b9 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Sat, 22 Nov 2025 09:26:37 +0800 Subject: [PATCH 13/14] use run_async in docs --- docs/models/anthropic.md | 132 ++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 57 deletions(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index 34417d6679..fa4dce4a36 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -87,13 +87,14 @@ Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-wit 3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly 4. **Cache All Messages**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache all messages -You can combine multiple strategies for maximum savings: +### Example 1: Automatic Last Message Caching + +Use `anthropic_cache_messages` to automatically cache the last user message: ```python {test="skip"} -from pydantic_ai import Agent, CachePoint, RunContext +from pydantic_ai import Agent from pydantic_ai.models.anthropic import AnthropicModelSettings -# Example 1: Use anthropic_cache_messages for automatic last message caching agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='You are a helpful assistant.', @@ -102,16 +103,23 @@ agent = Agent( ), ) -async def main(): - # The last message is automatically cached - no need for manual CachePoint - result1 = await agent.run('What is the capital of France?') +# The last message is automatically cached - no need for manual CachePoint +result1 = agent.run_sync('What is the capital of France?') + +# Subsequent calls with similar conversation benefit from cache +result2 = agent.run_sync('What is the capital of Germany?') +print(f'Cache write: {result1.usage().cache_write_tokens}') +print(f'Cache read: {result2.usage().cache_read_tokens}') +``` + +### Example 2: Comprehensive Caching Strategy - # Subsequent calls with similar conversation benefit from cache - result2 = await agent.run('What is the capital of Germany?') - print(f'Cache write: {result1.usage().cache_write_tokens}') - print(f'Cache read: {result2.usage().cache_read_tokens}') +Combine multiple cache settings for maximum savings: + +```python {test="skip"} +from pydantic_ai import Agent, RunContext +from pydantic_ai.models.anthropic import AnthropicModelSettings -# Example 2: Combine with other cache settings for comprehensive caching agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', @@ -127,27 +135,34 @@ def search_docs(ctx: RunContext, query: str) -> str: """Search documentation.""" return f'Results for {query}' -async def main(): # noqa: F811 - # All three cache points are used: instructions, tools, and last message - result = await agent.run('Search for Python best practices') - print(result.output) -# Example 3: Fine-grained control with manual CachePoint markers +result = agent.run_sync('Search for Python best practices') +print(result.output) +``` + +### Example 3: Fine-Grained Control with CachePoint + +Use manual `CachePoint` markers to control cache locations precisely: + +```python {test="skip"} +from pydantic_ai import Agent, CachePoint + agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Instructions...', ) -async def main(): # noqa: F811 - # Manually control cache points for specific content blocks - result = await agent.run([ - 'Long context from documentation...', - CachePoint(), # Cache everything up to this point - 'First question' - ]) - print(result.output) +# Manually control cache points for specific content blocks +result = agent.run_sync([ + 'Long context from documentation...', + CachePoint(), # Cache everything up to this point + 'First question' +]) +print(result.output) ``` +### Accessing Cache Usage Statistics + Access cache usage statistics via `result.usage()`: ```python {test="skip"} @@ -162,11 +177,10 @@ agent = Agent( ), ) -async def main(): # noqa: F811 - result = await agent.run('Your question') - usage = result.usage() - print(f'Cache write tokens: {usage.cache_write_tokens}') - print(f'Cache read tokens: {usage.cache_read_tokens}') +result = agent.run_sync('Your question') +usage = result.usage() +print(f'Cache write tokens: {usage.cache_write_tokens}') +print(f'Cache read tokens: {usage.cache_read_tokens}') ``` ### Cache Point Limits @@ -181,13 +195,16 @@ Cache points can be placed in three locations: 2. **Tool Definitions**: Via `anthropic_cache_tool_definitions` setting (adds cache point to last tool definition) 3. **Messages**: Via `CachePoint` markers or `anthropic_cache_messages` setting (adds cache points to message content) -Each setting uses **at most 1 cache point**, but you can combine them: +Each setting uses **at most 1 cache point**, but you can combine them. + +#### Example: Using All 3 Cache Point Sources + +Define an agent with all cache settings enabled: ```python {test="skip"} from pydantic_ai import Agent, CachePoint from pydantic_ai.models.anthropic import AnthropicModelSettings -# Example: Using all 3 cache point sources agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', @@ -202,22 +219,24 @@ agent = Agent( def my_tool() -> str: return 'result' -async def main(): # noqa: F811 - # This uses 3 cache points (instructions + tools + last message) - # You can add 1 more CachePoint marker before hitting the limit - result = await agent.run([ - 'Context', CachePoint(), # 4th cache point - OK - 'Question' - ]) - print(result.output) - usage = result.usage() - print(f'Cache write tokens: {usage.cache_write_tokens}') - print(f'Cache read tokens: {usage.cache_read_tokens}') + +# This uses 3 cache points (instructions + tools + last message) +# You can add 1 more CachePoint marker before hitting the limit +result = agent.run_sync([ + 'Context', CachePoint(), # 4th cache point - OK + 'Question' +]) +print(result.output) +usage = result.usage() +print(f'Cache write tokens: {usage.cache_write_tokens}') +print(f'Cache read tokens: {usage.cache_read_tokens}') ``` #### Automatic Cache Point Limiting -When cache points from all sources (settings + `CachePoint` markers) exceed 4, Pydantic AI automatically removes excess cache points from **older message content** (keeping the most recent ones): +When cache points from all sources (settings + `CachePoint` markers) exceed 4, Pydantic AI automatically removes excess cache points from **older message content** (keeping the most recent ones). + +Define an agent with 2 cache points from settings: ```python {test="skip"} from pydantic_ai import Agent, CachePoint @@ -236,20 +255,19 @@ agent = Agent( def search() -> str: return 'data' -async def main(): # noqa: F811 - # Already using 2 cache points (instructions + tools) - # Can add 2 more CachePoint markers (4 total limit) - result = await agent.run([ - 'Context 1', CachePoint(), # Oldest - will be removed - 'Context 2', CachePoint(), # Will be kept (3rd point) - 'Context 3', CachePoint(), # Will be kept (4th point) - 'Question' - ]) - # Final cache points: instructions + tools + Context 2 + Context 3 = 4 - print(result.output) - usage = result.usage() - print(f'Cache write tokens: {usage.cache_write_tokens}') - print(f'Cache read tokens: {usage.cache_read_tokens}') +# Already using 2 cache points (instructions + tools) +# Can add 2 more CachePoint markers (4 total limit) +result = agent.run_sync([ + 'Context 1', CachePoint(), # Oldest - will be removed + 'Context 2', CachePoint(), # Will be kept (3rd point) + 'Context 3', CachePoint(), # Will be kept (4th point) + 'Question' +]) +# Final cache points: instructions + tools + Context 2 + Context 3 = 4 +print(result.output) +usage = result.usage() +print(f'Cache write tokens: {usage.cache_write_tokens}') +print(f'Cache read tokens: {usage.cache_read_tokens}') ``` **Key Points**: From 4f58aa8b0bdc8939c0301c67639446b812e70b26 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 25 Nov 2025 17:38:41 -0600 Subject: [PATCH 14/14] Update docs/models/anthropic.md --- docs/models/anthropic.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index fa4dce4a36..b3ed1dc9ed 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -87,9 +87,9 @@ Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-wit 3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly 4. **Cache All Messages**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache all messages -### Example 1: Automatic Last Message Caching +### Example 1: Automatic Message Caching -Use `anthropic_cache_messages` to automatically cache the last user message: +Use `anthropic_cache_messages` to automatically cache all messages up to and including the newest user message: ```python {test="skip"} from pydantic_ai import Agent