diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index 96aa6207c1..aa5460bd0e 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -80,25 +80,45 @@ agent = Agent(model) ## Prompt Caching -Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching: +Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides four ways to use prompt caching: 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it 2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly 3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly +4. **Cache Last Message (Convenience)**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache the last user message -You can combine all three strategies for maximum savings: +You can combine multiple strategies for maximum savings: ```python {test="skip"} from pydantic_ai import Agent, CachePoint, RunContext from pydantic_ai.models.anthropic import AnthropicModelSettings +# Example 1: Use anthropic_cache_messages for automatic last message caching +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='You are a helpful assistant.', + model_settings=AnthropicModelSettings( + anthropic_cache_messages=True, # Automatically caches the last message + ), +) + +async def main(): + # The last message is automatically cached - no need for manual CachePoint + result1 = await agent.run('What is the capital of France?') + + # Subsequent calls with similar conversation benefit from cache + result2 = await agent.run('What is the capital of Germany?') + print(f'Cache write: {result1.usage().cache_write_tokens}') + print(f'Cache read: {result2.usage().cache_read_tokens}') + +# Example 2: Combine with other cache settings for comprehensive caching agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', model_settings=AnthropicModelSettings( - # Use True for default 5m TTL, or specify '5m' / '1h' directly - anthropic_cache_instructions=True, - anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions + anthropic_cache_instructions=True, # Cache system instructions + anthropic_cache_tool_definitions='1h', # Cache tool definitions with 1h TTL + anthropic_cache_messages=True, # Also cache the last message ), ) @@ -107,22 +127,25 @@ def search_docs(ctx: RunContext, query: str) -> str: """Search documentation.""" return f'Results for {query}' -async def main(): - # First call - writes to cache - result1 = await agent.run([ +async def main(): # noqa: F811 + # All three cache points are used: instructions, tools, and last message + result = await agent.run('Search for Python best practices') + print(result.output) + +# Example 3: Fine-grained control with manual CachePoint markers +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='Instructions...', +) + +async def main(): # noqa: F811 + # Manually control cache points for specific content blocks + result = await agent.run([ 'Long context from documentation...', - CachePoint(), + CachePoint(), # Cache everything up to this point 'First question' ]) - - # Subsequent calls - read from cache (90% cost reduction) - result2 = await agent.run([ - 'Long context from documentation...', # Same content - CachePoint(), - 'Second question' - ]) - print(f'First: {result1.output}') - print(f'Second: {result2.output}') + print(result.output) ``` Access cache usage statistics via `result.usage()`: @@ -139,9 +162,98 @@ agent = Agent( ), ) -async def main(): +async def main(): # noqa: F811 result = await agent.run('Your question') usage = result.usage() print(f'Cache write tokens: {usage.cache_write_tokens}') print(f'Cache read tokens: {usage.cache_read_tokens}') ``` + +### Cache Point Limits + +Anthropic enforces a maximum of 4 cache points per request. Pydantic AI automatically manages this limit to ensure your requests always comply without errors. + +#### How Cache Points Are Allocated + +Cache points can be placed in three locations: + +1. **System Prompt**: Via `anthropic_cache_instructions` setting (adds cache point to last system prompt block) +2. **Tool Definitions**: Via `anthropic_cache_tool_definitions` setting (adds cache point to last tool definition) +3. **Messages**: Via `CachePoint` markers or `anthropic_cache_messages` setting (adds cache points to message content) + +Each setting uses **at most 1 cache point**, but you can combine them: + +```python {test="skip"} +from pydantic_ai import Agent, CachePoint +from pydantic_ai.models.anthropic import AnthropicModelSettings + +# Example: Using all 3 cache point sources +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='Detailed instructions...', + model_settings=AnthropicModelSettings( + anthropic_cache_instructions=True, # 1 cache point + anthropic_cache_tool_definitions=True, # 1 cache point + anthropic_cache_messages=True, # 1 cache point + ), +) + +@agent.tool_plain +def my_tool() -> str: + return 'result' + +async def main(): # noqa: F811 + # This uses 3 cache points (instructions + tools + last message) + # You can add 1 more CachePoint marker before hitting the limit + result = await agent.run([ + 'Context', CachePoint(), # 4th cache point - OK + 'Question' + ]) + print(result.output) + usage = result.usage() + print(f'Cache write tokens: {usage.cache_write_tokens}') + print(f'Cache read tokens: {usage.cache_read_tokens}') +``` + +#### Automatic Cache Point Limiting + +When cache points from all sources (settings + `CachePoint` markers) exceed 4, Pydantic AI automatically removes excess cache points from **older message content** (keeping the most recent ones): + +```python {test="skip"} +from pydantic_ai import Agent, CachePoint +from pydantic_ai.models.anthropic import AnthropicModelSettings + +agent = Agent( + 'anthropic:claude-sonnet-4-5', + system_prompt='Instructions...', + model_settings=AnthropicModelSettings( + anthropic_cache_instructions=True, # 1 cache point + anthropic_cache_tool_definitions=True, # 1 cache point + ), +) + +@agent.tool_plain +def search() -> str: + return 'data' + +async def main(): # noqa: F811 + # Already using 2 cache points (instructions + tools) + # Can add 2 more CachePoint markers (4 total limit) + result = await agent.run([ + 'Context 1', CachePoint(), # Oldest - will be removed + 'Context 2', CachePoint(), # Will be kept (3rd point) + 'Context 3', CachePoint(), # Will be kept (4th point) + 'Question' + ]) + # Final cache points: instructions + tools + Context 2 + Context 3 = 4 + print(result.output) + usage = result.usage() + print(f'Cache write tokens: {usage.cache_write_tokens}') + print(f'Cache read tokens: {usage.cache_read_tokens}') +``` + +**Key Points**: +- System and tool cache points are **always preserved** +- The cache point created by `anthropic_cache_messages` is **always preserved** (as it's the newest message cache point) +- Additional `CachePoint` markers in messages are removed from oldest to newest when the limit is exceeded +- This ensures critical caching (instructions/tools) is maintained while still benefiting from message-level caching diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index de33a08f7a..2e4546fa2f 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -169,6 +169,19 @@ class AnthropicModelSettings(ModelSettings, total=False): See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ + anthropic_cache_messages: bool | Literal['5m', '1h'] + """Convenience setting to enable caching for the last user message. + + When enabled, this automatically adds a cache point to the last content block + in the final user message, which is useful for caching conversation history + or context in multi-turn conversations. + If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly. + + Note: Uses 1 of Anthropic's 4 available cache points per request. Any additional CachePoint + markers in messages will be automatically limited to respect the 4-cache-point maximum. + See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. + """ + @dataclass(init=False) class AnthropicModel(Model): @@ -333,7 +346,7 @@ async def _messages_create( tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) - + self._limit_cache_points(system_prompt, anthropic_messages, tools) try: extra_headers = self._map_extra_headers(beta_features, model_settings) @@ -376,7 +389,7 @@ async def _messages_count_tokens( tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) - + self._limit_cache_points(system_prompt, anthropic_messages, tools) try: extra_headers = self._map_extra_headers(beta_features, model_settings) @@ -747,6 +760,25 @@ async def _map_message( # noqa: C901 system_prompt_parts.insert(0, instructions) system_prompt = '\n\n'.join(system_prompt_parts) + # Add cache_control to the last message content if anthropic_cache_messages is enabled + if anthropic_messages and (cache_messages := model_settings.get('anthropic_cache_messages')): + ttl: Literal['5m', '1h'] = '5m' if cache_messages is True else cache_messages + m = anthropic_messages[-1] + content = m['content'] + if isinstance(content, str): + # Convert string content to list format with cache_control + m['content'] = [ # pragma: no cover + BetaTextBlockParam( + text=content, + type='text', + cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl), + ) + ] + else: + # Add cache_control to the last content block + content = cast(list[BetaContentBlockParam], content) + self._add_cache_control_to_last_param(content, ttl) + # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')): # If True, use '5m'; otherwise use the specified ttl value @@ -762,6 +794,75 @@ async def _map_message( # noqa: C901 return system_prompt, anthropic_messages + @staticmethod + def _limit_cache_points( + system_prompt: str | list[BetaTextBlockParam], + anthropic_messages: list[BetaMessageParam], + tools: list[BetaToolUnionParam], + ) -> None: + """Limit the number of cache points in the request to Anthropic's maximum. + + Anthropic enforces a maximum of 4 cache points per request. This method ensures + compliance by counting existing cache points and removing excess ones from messages. + + Strategy: + 1. Count cache points in system_prompt (can be multiple if list of blocks) + 2. Count cache points in tools (can be in any position, not just last) + 3. Raise UserError if system + tools already exceed MAX_CACHE_POINTS + 4. Calculate remaining budget for message cache points + 5. Traverse messages from newest to oldest, keeping the most recent cache points + within the remaining budget + 6. Remove excess cache points from older messages to stay within limit + + Cache point priority (always preserved): + - System prompt cache points + - Tool definition cache points + - Message cache points (newest first, oldest removed if needed) + + Raises: + UserError: If system_prompt and tools combined already exceed MAX_CACHE_POINTS (4). + This indicates a configuration error that cannot be auto-fixed. + """ + MAX_CACHE_POINTS = 4 + + # Count existing cache points in system prompt + used_cache_points = ( + sum(1 for block in system_prompt if 'cache_control' in cast(dict[str, Any], block)) + if isinstance(system_prompt, list) + else 0 + ) + + # Count existing cache points in tools (any tool may have cache_control) + # Note: cache_control can be in the middle of tools list if builtin tools are added after + for tool in tools: + if 'cache_control' in tool: + used_cache_points += 1 + + # Calculate remaining cache points budget for messages + remaining_budget = MAX_CACHE_POINTS - used_cache_points + if remaining_budget < 0: # pragma: no cover + raise UserError( + f'Too many cache points for Anthropic request. ' + f'System prompt and tool definitions already use {used_cache_points} cache points, ' + f'which exceeds the maximum of {MAX_CACHE_POINTS}.' + ) + # Remove excess cache points from messages (newest to oldest) + for message in reversed(anthropic_messages): + content = message['content'] + if isinstance(content, str): # pragma: no cover + continue + + # Process content blocks in reverse order (newest first) + for block in reversed(cast(list[BetaContentBlockParam], content)): + block_dict = cast(dict[str, Any], block) + + if 'cache_control' in block_dict: + if remaining_budget > 0: + remaining_budget -= 1 + else: + # Exceeded limit, remove this cache point + del block_dict['cache_control'] + @staticmethod def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None: """Add cache control to the last content block param. diff --git a/tests/models/cassettes/test_anthropic/test_anthropic_cache_messages_real_api.yaml b/tests/models/cassettes/test_anthropic/test_anthropic_cache_messages_real_api.yaml new file mode 100644 index 0000000000..a1711a107c --- /dev/null +++ b/tests/models/cassettes/test_anthropic/test_anthropic_cache_messages_real_api.yaml @@ -0,0 +1,327 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '5617' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + max_tokens: 4096 + messages: + - content: + - cache_control: + ttl: 5m + type: ephemeral + text: 'Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. ' + type: text + role: user + model: claude-sonnet-4-5 + stream: false + system: You are a helpful assistant. + uri: https://api.anthropic.com/v1/messages?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '1986' + content-type: + - application/json + retry-after: + - '3' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + parsed_body: + content: + - text: |- + # What is Python? + + **Python** is a high-level, interpreted programming language created by Guido van Rossum and first released in 1991. It emphasizes code readability and simplicity, using clear syntax that often resembles plain English. + + ## Key Characteristics: + - **Easy to learn**: Simple, intuitive syntax ideal for beginners + - **Interpreted**: Code runs line-by-line without compilation + - **Dynamically typed**: No need to declare variable types + - **Versatile**: Supports multiple programming paradigms (procedural, object-oriented, functional) + - **Extensive libraries**: Vast ecosystem of packages and frameworks + + ## Main Use Cases: + + ### 1. **Web Development** + - Frameworks: Django, Flask, FastAPI + - Backend services and APIs + + ### 2. **Data Science & Analytics** + - Libraries: Pandas, NumPy, Matplotlib + - Data manipulation and visualization + + ### 3. **Machine Learning & AI** + - Frameworks: TensorFlow, PyTorch, scikit-learn + - Deep learning and predictive modeling + + ### 4. **Automation & Scripting** + - Task automation + - System administration + + ### 5. **Scientific Computing** + - Research and computational analysis + - Libraries: SciPy, SymPy + + ### 6. **Software Development** + - Application development + - Prototyping + + ### 7. **Cybersecurity** + - Penetration testing tools + - Security automation + + ### 8. **Game Development** + - Libraries: Pygame + - Prototyping game logic + + Python's versatility and ease of use make it one of the most popular programming languages worldwide, used by companies like Google, Netflix, NASA, and many others. + type: text + id: msg_01FdUT99HgS9cAzcv4ztTDYJ + model: claude-sonnet-4-5-20250929 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: + cache_creation: + ephemeral_1h_input_tokens: 0 + ephemeral_5m_input_tokens: 1111 + cache_creation_input_tokens: 1111 + cache_read_input_tokens: 0 + input_tokens: 3 + output_tokens: 391 + service_tier: standard + status: + code: 200 + message: OK +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '7343' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + max_tokens: 4096 + messages: + - content: + - text: 'Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its + main use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main + use cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use + cases. Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. + Please explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please + explain what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain + what Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what + Python is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python + is and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is + and its main use cases. Please explain what Python is and its main use cases. Please explain what Python is and + its main use cases. Please explain what Python is and its main use cases. ' + type: text + role: user + - content: + - text: |- + # What is Python? + + **Python** is a high-level, interpreted programming language created by Guido van Rossum and first released in 1991. It emphasizes code readability and simplicity, using clear syntax that often resembles plain English. + + ## Key Characteristics: + - **Easy to learn**: Simple, intuitive syntax ideal for beginners + - **Interpreted**: Code runs line-by-line without compilation + - **Dynamically typed**: No need to declare variable types + - **Versatile**: Supports multiple programming paradigms (procedural, object-oriented, functional) + - **Extensive libraries**: Vast ecosystem of packages and frameworks + + ## Main Use Cases: + + ### 1. **Web Development** + - Frameworks: Django, Flask, FastAPI + - Backend services and APIs + + ### 2. **Data Science & Analytics** + - Libraries: Pandas, NumPy, Matplotlib + - Data manipulation and visualization + + ### 3. **Machine Learning & AI** + - Frameworks: TensorFlow, PyTorch, scikit-learn + - Deep learning and predictive modeling + + ### 4. **Automation & Scripting** + - Task automation + - System administration + + ### 5. **Scientific Computing** + - Research and computational analysis + - Libraries: SciPy, SymPy + + ### 6. **Software Development** + - Application development + - Prototyping + + ### 7. **Cybersecurity** + - Penetration testing tools + - Security automation + + ### 8. **Game Development** + - Libraries: Pygame + - Prototyping game logic + + Python's versatility and ease of use make it one of the most popular programming languages worldwide, used by companies like Google, Netflix, NASA, and many others. + type: text + role: assistant + - content: + - cache_control: + ttl: 5m + type: ephemeral + text: Can you summarize that in one sentence? + type: text + role: user + model: claude-sonnet-4-5 + stream: false + system: You are a helpful assistant. + uri: https://api.anthropic.com/v1/messages?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '576' + content-type: + - application/json + retry-after: + - '56' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + parsed_body: + content: + - text: Python is a beginner-friendly, versatile programming language widely used for web development, data science, + machine learning, automation, and scientific computing. + type: text + id: msg_01PYfyaNS7Rysss2xMNqQgy9 + model: claude-sonnet-4-5-20250929 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: + cache_creation: + ephemeral_1h_input_tokens: 0 + ephemeral_5m_input_tokens: 403 + cache_creation_input_tokens: 403 + cache_read_input_tokens: 1111 + input_tokens: 3 + output_tokens: 33 + service_tier: standard + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 86ba5a68d3..a5408258de 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -588,6 +588,163 @@ def my_tool(value: str) -> str: # pragma: no cover assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'}) +async def test_anthropic_cache_messages(allow_model_requests: None): + """Test that anthropic_cache_messages caches only the last message.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions to cache.', + model_settings=AnthropicModelSettings( + anthropic_cache_messages=True, + ), + ) + + await agent.run('User message') + + # Verify only last message has cache_control, not system + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + system = completion_kwargs['system'] + messages = completion_kwargs['messages'] + + # System should NOT have cache_control (should be a plain string) + assert system == snapshot('System instructions to cache.') + + # Last message content should have cache_control + assert messages[-1]['content'][-1] == snapshot( + {'type': 'text', 'text': 'User message', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}} + ) + + +async def test_anthropic_cache_messages_with_custom_ttl(allow_model_requests: None): + """Test that anthropic_cache_messages supports custom TTL values.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions.', + model_settings=AnthropicModelSettings( + anthropic_cache_messages='1h', # Custom 1h TTL + ), + ) + + await agent.run('User message') + + # Verify use 1h TTL + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + messages = completion_kwargs['messages'] + + assert messages[-1]['content'][-1]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'}) + + +async def test_limit_cache_points_with_cache_messages(allow_model_requests: None): + """Test that cache points are limited when using cache_messages + CachePoint markers.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions.', + model_settings=AnthropicModelSettings( + anthropic_cache_messages=True, # Uses 1 cache point + ), + ) + + # Add 4 CachePoint markers (total would be 5: 1 from cache_messages + 4 from markers) + # Only 3 CachePoint markers should be kept (newest ones) + await agent.run( + [ + 'Context 1', + CachePoint(), # Oldest, should be removed + 'Context 2', + CachePoint(), # Should be kept + 'Context 3', + CachePoint(), # Should be kept + 'Context 4', + CachePoint(), # Should be kept + 'Question', + ] + ) + + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + messages = completion_kwargs['messages'] + + # Count cache_control occurrences in messages + cache_count = 0 + for msg in messages: + for block in msg['content']: + if 'cache_control' in block: + cache_count += 1 + + # anthropic_cache_messages uses 1 cache point (last message only) + # With 4 CachePoint markers, we'd have 5 total + # Limit is 4, so 1 oldest CachePoint should be removed + # Result: 3 cache points from CachePoint markers + 1 from cache_messages = 4 total + assert cache_count == 4 + + +async def test_limit_cache_points_all_settings(allow_model_requests: None): + """Test cache point limiting with all cache settings enabled.""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + + agent = Agent( + m, + system_prompt='System instructions.', + model_settings=AnthropicModelSettings( + anthropic_cache_instructions=True, # 1 cache point + anthropic_cache_tool_definitions=True, # 1 cache point + ), + ) + + @agent.tool_plain + def my_tool() -> str: # pragma: no cover + return 'result' + + # Add 3 CachePoint markers (total would be 5: 2 from settings + 3 from markers) + # Only 2 CachePoint markers should be kept + await agent.run( + [ + 'Context 1', + CachePoint(), # Oldest, should be removed + 'Context 2', + CachePoint(), # Should be kept + 'Context 3', + CachePoint(), # Should be kept + 'Question', + ] + ) + + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + messages = completion_kwargs['messages'] + + # Count cache_control in messages (excluding system and tools) + cache_count = 0 + for msg in messages: + for block in msg['content']: + if 'cache_control' in block: + cache_count += 1 + + # Should have exactly 2 cache points in messages + # (4 total - 1 system - 1 tool = 2 available for messages) + assert cache_count == 2 + + async def test_async_request_text_response(allow_model_requests: None): c = completion_message( [BetaTextBlock(text='world', type='text')], @@ -6516,3 +6673,45 @@ async def test_anthropic_bedrock_count_tokens_not_supported(env: TestEnv): with pytest.raises(UserError, match='AsyncAnthropicBedrock client does not support `count_tokens` api.'): await agent.run('hello', usage_limits=UsageLimits(input_tokens_limit=20, count_tokens_before_request=True)) + + +@pytest.mark.vcr() +async def test_anthropic_cache_messages_real_api(allow_model_requests: None, anthropic_api_key: str): + """Test that anthropic_cache_messages setting adds cache_control and produces cache usage metrics. + + This test uses a cassette to verify the cache behavior without making real API calls in CI. + When run with real API credentials, it demonstrates that: + 1. The first call with a long context creates a cache (cache_write_tokens > 0) + 2. Follow-up messages in the same conversation can read from that cache (cache_read_tokens > 0) + """ + m = AnthropicModel('claude-sonnet-4-5', provider=AnthropicProvider(api_key=anthropic_api_key)) + agent = Agent( + m, + system_prompt='You are a helpful assistant.', + model_settings=AnthropicModelSettings( + anthropic_cache_messages=True, + ), + ) + + # First call with a longer message - this will cache the message content + result1 = await agent.run('Please explain what Python is and its main use cases. ' * 100) + usage1 = result1.usage() + + # With anthropic_cache_messages, the first call should write cache for the last message + # (cache_write_tokens > 0 indicates that caching occurred) + assert usage1.requests == 1 + assert usage1.cache_write_tokens > 0 + assert usage1.output_tokens > 0 + + # Continue the conversation - this message appends to history + # The previous cached message should still be in the request + result2 = await agent.run('Can you summarize that in one sentence?', message_history=result1.all_messages()) + usage2 = result2.usage() + + # The second call should potentially read from cache if the previous message is still cached + # (cache_read_tokens > 0 when cache hit occurs) + # (cache_write_tokens > 0 as new message is added to cache) + assert usage2.requests == 1 + assert usage2.cache_read_tokens > 0 + assert usage2.cache_write_tokens > 0 + assert usage2.output_tokens > 0