From 25faefc062de2e7e967e18ca886b0c5f27c1e50b Mon Sep 17 00:00:00 2001 From: Denys Moskalenko Date: Sat, 15 Nov 2025 16:30:40 +0100 Subject: [PATCH 1/3] Bedrock: add prompt caching support and verification - Emit cache-point tool entries so Bedrock accepts cached tool definitions - Document and test prompt caching (writes + reads) with cassette-body checks - Refresh Bedrock cassettes and type annotations to align with the new flow --- docs/models/bedrock.md | 73 +++++++ pydantic_ai_slim/pydantic_ai/messages.py | 1 + .../pydantic_ai/models/bedrock.py | 62 +++++- ...edrock_cache_point_adds_cache_control.yaml | 103 ++++++++++ .../test_bedrock_cache_write_and_read.yaml | 148 ++++++++++++++ ...st_bedrock_model_usage_limit_exceeded.yaml | 8 +- tests/models/test_bedrock.py | 183 +++++++++++++++--- 7 files changed, 543 insertions(+), 35 deletions(-) create mode 100644 tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml create mode 100644 tests/models/cassettes/test_bedrock/test_bedrock_cache_write_and_read.yaml diff --git a/docs/models/bedrock.md b/docs/models/bedrock.md index 55cb47bf15..667ca487d4 100644 --- a/docs/models/bedrock.md +++ b/docs/models/bedrock.md @@ -74,6 +74,79 @@ model = BedrockConverseModel(model_name='us.amazon.nova-pro-v1:0') agent = Agent(model=model, model_settings=bedrock_model_settings) ``` +## Prompt Caching + +Bedrock supports [prompt caching](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html) on Anthropic models so you can reuse expensive context across requests. Pydantic AI exposes the same three strategies as Anthropic: + +1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker to cache everything before it in the current user message. +2. **Cache System Instructions**: Enable [`BedrockModelSettings.bedrock_cache_instructions`][pydantic_ai.models.bedrock.BedrockModelSettings.bedrock_cache_instructions] to append a cache point after the system prompt. +3. **Cache Tool Definitions**: Enable [`BedrockModelSettings.bedrock_cache_tool_definitions`][pydantic_ai.models.bedrock.BedrockModelSettings.bedrock_cache_tool_definitions] to cache your tool schemas. + +> [!NOTE] +> AWS only serves cached content once a segment crosses the provider-specific minimum token thresholds (see the [Bedrock prompt caching docs](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html)). Short prompts or tool definitions below those limits will bypass the cache, so don't expect savings for tiny payloads. + +You can combine all of them: + +```python +from pydantic_ai import Agent, CachePoint, RunContext +from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelSettings + +model = BedrockConverseModel('us.anthropic.claude-sonnet-4-5-20250929-v1:0') +agent = Agent( + model, + system_prompt='Detailed instructions...', + model_settings=BedrockModelSettings( + bedrock_cache_instructions=True, + bedrock_cache_tool_definitions=True, + ), +) + + +@agent.tool +async def search_docs(ctx: RunContext, query: str) -> str: + return f'Results for {query}' + + +async def main(): + result1 = await agent.run( + [ + 'Long cached context...', + CachePoint(), + 'First question', + ] + ) + result2 = await agent.run( + [ + 'Long cached context...', + CachePoint(), + 'Second question', + ] + ) + print(result1.output, result1.usage()) + print(result2.output, result2.usage()) +``` + +Access cache usage statistics via [`RequestUsage`][pydantic_ai.usage.RequestUsage]: + +```python +from pydantic_ai import Agent, CachePoint + +agent = Agent('bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0') + + +async def main(): + result = await agent.run( + [ + 'Reference material...', + CachePoint(), + 'What changed since last time?', + ] + ) + usage = result.usage() + print(f'Cache writes: {usage.cache_write_tokens}') + print(f'Cache reads: {usage.cache_read_tokens}') +``` + ## `provider` argument You can provide a custom `BedrockProvider` via the `provider` argument. This is useful when you want to specify credentials directly or use a custom boto3 client: diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 1f3b5cd6e5..3d37b32091 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -622,6 +622,7 @@ class CachePoint: Supported by: - Anthropic + - Amazon Bedrock (Converse API) """ kind: Literal['cache-point'] = 'cache-point' diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py index acb98e5ec0..b2fcecaffd 100644 --- a/pydantic_ai_slim/pydantic_ai/models/bedrock.py +++ b/pydantic_ai_slim/pydantic_ai/models/bedrock.py @@ -208,6 +208,21 @@ class BedrockModelSettings(ModelSettings, total=False): See more about it on . """ + bedrock_cache_tool_definitions: bool + """Whether to add a cache point after the last tool definition. + + When enabled, the last tool in the `tools` array will include a `cachePoint`, allowing Bedrock to cache tool + definitions and reduce costs for compatible models. + See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html for more information. + """ + + bedrock_cache_instructions: bool + """Whether to add a cache point after the system prompt blocks. + + When enabled, an extra `cachePoint` is appended to the system prompt so Bedrock can cache system instructions. + See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html for more information. + """ + @dataclass(init=False) class BedrockConverseModel(Model): @@ -299,7 +314,8 @@ async def count_tokens( Check the actual supported models on """ model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters) - system_prompt, bedrock_messages = await self._map_messages(messages, model_request_parameters) + settings = cast(BedrockModelSettings, model_settings or {}) + system_prompt, bedrock_messages = await self._map_messages(messages, model_request_parameters, settings) params: CountTokensRequestTypeDef = { 'modelId': self._remove_inference_geo_prefix(self.model_name), 'input': { @@ -374,6 +390,8 @@ async def _process_response(self, response: ConverseResponseTypeDef) -> ModelRes u = usage.RequestUsage( input_tokens=response['usage']['inputTokens'], output_tokens=response['usage']['outputTokens'], + cache_read_tokens=response['usage'].get('cacheReadInputTokens', 0), + cache_write_tokens=response['usage'].get('cacheWriteInputTokens', 0), ) response_id = response.get('ResponseMetadata', {}).get('RequestId', None) raw_finish_reason = response['stopReason'] @@ -417,8 +435,9 @@ async def _messages_create( model_settings: BedrockModelSettings | None, model_request_parameters: ModelRequestParameters, ) -> ConverseResponseTypeDef | ConverseStreamResponseTypeDef: - system_prompt, bedrock_messages = await self._map_messages(messages, model_request_parameters) - inference_config = self._map_inference_config(model_settings) + settings = model_settings or BedrockModelSettings() + system_prompt, bedrock_messages = await self._map_messages(messages, model_request_parameters, settings) + inference_config = self._map_inference_config(settings) params: ConverseRequestTypeDef = { 'modelId': self.model_name, @@ -427,7 +446,7 @@ async def _messages_create( 'inferenceConfig': inference_config, } - tool_config = self._map_tool_config(model_request_parameters) + tool_config = self._map_tool_config(model_request_parameters, settings) if tool_config: params['toolConfig'] = tool_config @@ -481,11 +500,18 @@ def _map_inference_config( return inference_config - def _map_tool_config(self, model_request_parameters: ModelRequestParameters) -> ToolConfigurationTypeDef | None: + def _map_tool_config( + self, + model_request_parameters: ModelRequestParameters, + model_settings: BedrockModelSettings | None, + ) -> ToolConfigurationTypeDef | None: tools = self._get_tools(model_request_parameters) if not tools: return None + if model_settings and model_settings.get('bedrock_cache_tool_definitions'): + tools.append({'cachePoint': {'type': 'default'}}) + tool_choice: ToolChoiceTypeDef if not model_request_parameters.allow_text_output: tool_choice = {'any': {}} @@ -499,12 +525,16 @@ def _map_tool_config(self, model_request_parameters: ModelRequestParameters) -> return tool_config async def _map_messages( # noqa: C901 - self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters + self, + messages: list[ModelMessage], + model_request_parameters: ModelRequestParameters, + model_settings: BedrockModelSettings | None, ) -> tuple[list[SystemContentBlockTypeDef], list[MessageUnionTypeDef]]: """Maps a `pydantic_ai.Message` to the Bedrock `MessageUnionTypeDef`. Groups consecutive ToolReturnPart objects into a single user message as required by Bedrock Claude/Nova models. """ + settings = model_settings or BedrockModelSettings() profile = BedrockModelProfile.from_profile(self.profile) system_prompt: list[SystemContentBlockTypeDef] = [] bedrock_messages: list[MessageUnionTypeDef] = [] @@ -613,10 +643,13 @@ async def _map_messages( # noqa: C901 if instructions := self._get_instructions(messages, model_request_parameters): system_prompt.insert(0, {'text': instructions}) + if system_prompt and settings.get('bedrock_cache_instructions'): + system_prompt.append({'cachePoint': {'type': 'default'}}) + return system_prompt, processed_messages @staticmethod - async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int]) -> list[MessageUnionTypeDef]: + async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int]) -> list[MessageUnionTypeDef]: # noqa: C901 content: list[ContentBlockUnionTypeDef] = [] if isinstance(part.content, str): content.append({'text': part.content}) @@ -674,8 +707,17 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int]) elif isinstance(item, AudioUrl): # pragma: no cover raise NotImplementedError('Audio is not supported yet.') elif isinstance(item, CachePoint): - # Bedrock support has not been implemented yet: https://github.com/pydantic/pydantic-ai/issues/3418 - pass + if not content or 'cachePoint' in content[-1]: + raise UserError( + 'CachePoint cannot be the first content in a user message - there must be previous content to cache when using Bedrock. ' + 'To cache system instructions or tool definitions, use the `bedrock_cache_instructions` or `bedrock_cache_tool_definitions` settings instead.' + ) + if 'text' not in content[-1]: + # AWS currently rejects cache points that directly follow non-text content. + # Insert an empty text block as a workaround (see https://github.com/pydantic/pydantic-ai/issues/3418 + # and https://github.com/pydantic/pydantic-ai/pull/2560#discussion_r2349209916). + content.append({'text': '\n'}) + content.append({'cachePoint': {'type': 'default'}}) else: assert_never(item) return [{'role': 'user', 'content': content}] @@ -796,6 +838,8 @@ def _map_usage(self, metadata: ConverseStreamMetadataEventTypeDef) -> usage.Requ return usage.RequestUsage( input_tokens=metadata['usage']['inputTokens'], output_tokens=metadata['usage']['outputTokens'], + cache_read_tokens=metadata['usage'].get('cacheReadInputTokens', 0), + cache_write_tokens=metadata['usage'].get('cacheWriteInputTokens', 0), ) diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml b/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml new file mode 100644 index 0000000000..4bd60afe27 --- /dev/null +++ b/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml @@ -0,0 +1,103 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": [{"text": "long promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong + prompt"}, {"cachePoint": {"type": "default"}}, {"text": "Response only number What is 2 + 3"}]}], "system": [], "inferenceConfig": + {}}' + headers: + amz-sdk-invocation-id: + - !!binary | + MWQ3YjUzZDItNTI1NS00NDJhLWE5ZjAtZDM0YTMzMzcxOTI5 + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + content-length: + - '6781' + content-type: + - !!binary | + YXBwbGljYXRpb24vanNvbg== + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-sonnet-4-5-20250929-v1%3A0/converse + response: + headers: + connection: + - keep-alive + content-length: + - '321' + content-type: + - application/json + parsed_body: + metrics: + latencyMs: 1867 + output: + message: + content: + - text: '5' + role: assistant + stopReason: end_turn + usage: + cacheReadInputTokenCount: 0 + cacheReadInputTokens: 0 + cacheWriteInputTokenCount: 1203 + cacheWriteInputTokens: 1203 + inputTokens: 15 + outputTokens: 5 + serverToolUsage: {} + totalTokens: 1223 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_cache_write_and_read.yaml b/tests/models/cassettes/test_bedrock/test_bedrock_cache_write_and_read.yaml new file mode 100644 index 0000000000..c1120f0e55 --- /dev/null +++ b/tests/models/cassettes/test_bedrock/test_bedrock_cache_write_and_read.yaml @@ -0,0 +1,148 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": [{"text": "Newer response with something except single number\nNewer + response with something except single number\nNewer response with something except single number\nNewer response with + something except single number\nNewer response with something except single number\nNewer response with something except + single number\nNewer response with something except single number\nNewer response with something except single number\nNewer + response with something except single number\nNewer response with something except single number\n"}, {"cachePoint": + {"type": "default"}}, {"document": {"name": "Document 1", "format": "txt", "source": {"bytes": "WW91IGFyZSBhIGdyZWF0IG1hdGhlbWF0aWNpYW4="}}}, + {"text": "\n"}, {"cachePoint": {"type": "default"}}, {"text": "What is 10 + 11?"}]}], "system": [{"text": "YOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\n"}, {"cachePoint": {"type": + "default"}}], "inferenceConfig": {}, "toolConfig": {"tools": [{"toolSpec": {"name": "catalog_lookup", "inputSchema": + {"json": {"additionalProperties": false, "properties": {}, "type": "object"}}}}, {"toolSpec": {"name": "diagnostics", + "inputSchema": {"json": {"additionalProperties": false, "properties": {}, "type": "object"}}}}, {"cachePoint": {"type": + "default"}}], "toolChoice": {"auto": {}}}}' + headers: + amz-sdk-invocation-id: + - !!binary | + MTNhZTU1NTctNmMwNC00Y2Q3LTg5YzQtZGJiMmQ4OWQ2NzBl + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + content-length: + - '3416' + content-type: + - !!binary | + YXBwbGljYXRpb24vanNvbg== + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-sonnet-4-5-20250929-v1%3A0/converse + response: + headers: + connection: + - keep-alive + content-length: + - '322' + content-type: + - application/json + parsed_body: + metrics: + latencyMs: 2053 + output: + message: + content: + - text: '21' + role: assistant + stopReason: end_turn + usage: + cacheReadInputTokenCount: 0 + cacheReadInputTokens: 0 + cacheWriteInputTokenCount: 1313 + cacheWriteInputTokens: 1313 + inputTokens: 11 + outputTokens: 5 + serverToolUsage: {} + totalTokens: 1329 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": [{"text": "Newer response with something except single number\nNewer + response with something except single number\nNewer response with something except single number\nNewer response with + something except single number\nNewer response with something except single number\nNewer response with something except + single number\nNewer response with something except single number\nNewer response with something except single number\nNewer + response with something except single number\nNewer response with something except single number\n"}, {"cachePoint": + {"type": "default"}}, {"document": {"name": "Document 1", "format": "txt", "source": {"bytes": "WW91IGFyZSBhIGdyZWF0IG1hdGhlbWF0aWNpYW4="}}}, + {"text": "\n"}, {"cachePoint": {"type": "default"}}, {"text": "What is 10 + 11?"}]}], "system": [{"text": "YOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\n"}, {"cachePoint": {"type": + "default"}}], "inferenceConfig": {}, "toolConfig": {"tools": [{"toolSpec": {"name": "catalog_lookup", "inputSchema": + {"json": {"additionalProperties": false, "properties": {}, "type": "object"}}}}, {"toolSpec": {"name": "diagnostics", + "inputSchema": {"json": {"additionalProperties": false, "properties": {}, "type": "object"}}}}, {"cachePoint": {"type": + "default"}}], "toolChoice": {"auto": {}}}}' + headers: + amz-sdk-invocation-id: + - !!binary | + MTVjNDBlYzYtZjE1OC00MWRkLWJjNzItYWJkZjExZWVmZTBk + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + content-length: + - '3416' + content-type: + - !!binary | + YXBwbGljYXRpb24vanNvbg== + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-sonnet-4-5-20250929-v1%3A0/converse + response: + headers: + connection: + - keep-alive + content-length: + - '322' + content-type: + - application/json + parsed_body: + metrics: + latencyMs: 2030 + output: + message: + content: + - text: '21' + role: assistant + stopReason: end_turn + usage: + cacheReadInputTokenCount: 1313 + cacheReadInputTokens: 1313 + cacheWriteInputTokenCount: 0 + cacheWriteInputTokens: 0 + inputTokens: 11 + outputTokens: 5 + serverToolUsage: {} + totalTokens: 1329 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_exceeded.yaml b/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_exceeded.yaml index 92cefc9519..09e42ffceb 100644 --- a/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_exceeded.yaml +++ b/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_exceeded.yaml @@ -1,16 +1,16 @@ interactions: - request: body: '{"input": {"converse": {"messages": [{"role": "user", "content": [{"text": "The quick brown fox jumps over the - lazydog."}]}], "system": []}}}' + lazydog."}, {"cachePoint": {"type": "default"}}, {"text": "What was next?"}]}], "system": []}}}' headers: amz-sdk-invocation-id: - !!binary | - ZDYxNmVkOTktYzgwMi00MDE0LTljZGUtYWFjMjk5N2I2MDFj + Y2FhYzlhNjAtNWYyOC00NjhmLWJkZTgtODM0OTdhMjc1ZTI3 amz-sdk-request: - !!binary | YXR0ZW1wdD0x content-length: - - '141' + - '206' content-type: - !!binary | YXBwbGljYXRpb24vanNvbg== @@ -25,7 +25,7 @@ interactions: content-type: - application/json parsed_body: - inputTokens: 19 + inputTokens: 23 status: code: 200 message: OK diff --git a/tests/models/test_bedrock.py b/tests/models/test_bedrock.py index cce18a9227..e36852e9e0 100644 --- a/tests/models/test_bedrock.py +++ b/tests/models/test_bedrock.py @@ -15,6 +15,7 @@ FunctionToolCallEvent, FunctionToolResultEvent, ImageUrl, + ModelMessage, ModelRequest, ModelResponse, PartDeltaEvent, @@ -33,13 +34,9 @@ VideoUrl, ) from pydantic_ai.agent import Agent -from pydantic_ai.exceptions import ModelHTTPError, ModelRetry, UsageLimitExceeded +from pydantic_ai.exceptions import ModelHTTPError, ModelRetry, UsageLimitExceeded, UserError from pydantic_ai.messages import AgentStreamEvent from pydantic_ai.models import ModelRequestParameters -from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelSettings -from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings -from pydantic_ai.providers.bedrock import BedrockProvider -from pydantic_ai.providers.openai import OpenAIProvider from pydantic_ai.run import AgentRunResult, AgentRunResultEvent from pydantic_ai.tools import ToolDefinition from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits @@ -47,7 +44,10 @@ from ..conftest import IsDatetime, IsInstance, IsStr, try_import with try_import() as imports_successful: - pass + from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelSettings + from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings + from pydantic_ai.providers.bedrock import BedrockProvider + from pydantic_ai.providers.openai import OpenAIProvider pytestmark = [ pytest.mark.skipif(not imports_successful(), reason='bedrock not installed'), @@ -99,6 +99,7 @@ async def test_bedrock_model(allow_model_requests: None, bedrock_provider: Bedro ) +@pytest.mark.vcr() async def test_bedrock_model_usage_limit_exceeded( allow_model_requests: None, bedrock_provider: BedrockProvider, @@ -108,14 +109,15 @@ async def test_bedrock_model_usage_limit_exceeded( with pytest.raises( UsageLimitExceeded, - match='The next request would exceed the input_tokens_limit of 18 \\(input_tokens=19\\)', + match='The next request would exceed the input_tokens_limit of 18 \\(input_tokens=23\\)', ): await agent.run( - 'The quick brown fox jumps over the lazydog.', + ['The quick brown fox jumps over the lazydog.', CachePoint(), 'What was next?'], usage_limits=UsageLimits(input_tokens_limit=18, count_tokens_before_request=True), ) +@pytest.mark.vcr() async def test_bedrock_model_usage_limit_not_exceeded( allow_model_requests: None, bedrock_provider: BedrockProvider, @@ -1224,7 +1226,7 @@ async def test_bedrock_group_consecutive_tool_return_parts(bedrock_provider: Bed ] # Call the mapping function directly - _, bedrock_messages = await model._map_messages(req, ModelRequestParameters()) # type: ignore[reportPrivateUsage] + _, bedrock_messages = await model._map_messages(req, ModelRequestParameters(), BedrockModelSettings()) # type: ignore[reportPrivateUsage] assert bedrock_messages == snapshot( [ @@ -1345,7 +1347,7 @@ async def test_bedrock_mistral_tool_result_format(bedrock_provider: BedrockProvi # Models other than Mistral support toolResult.content with text, not json model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider) # Call the mapping function directly - _, bedrock_messages = await model._map_messages(req, ModelRequestParameters()) # type: ignore[reportPrivateUsage] + _, bedrock_messages = await model._map_messages(req, ModelRequestParameters(), BedrockModelSettings()) # type: ignore[reportPrivateUsage] assert bedrock_messages == snapshot( [ @@ -1361,7 +1363,7 @@ async def test_bedrock_mistral_tool_result_format(bedrock_provider: BedrockProvi # Mistral requires toolResult.content to hold json, not text model = BedrockConverseModel('mistral.mistral-7b-instruct-v0:2', provider=bedrock_provider) # Call the mapping function directly - _, bedrock_messages = await model._map_messages(req, ModelRequestParameters()) # type: ignore[reportPrivateUsage] + _, bedrock_messages = await model._map_messages(req, ModelRequestParameters(), BedrockModelSettings()) # type: ignore[reportPrivateUsage] assert bedrock_messages == snapshot( [ @@ -1385,7 +1387,7 @@ async def test_bedrock_no_tool_choice(bedrock_provider: BedrockProvider): # Amazon Nova supports tool_choice model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider) - tool_config = model._map_tool_config(mrp) # type: ignore[reportPrivateUsage] + tool_config = model._map_tool_config(mrp, BedrockModelSettings()) # type: ignore[reportPrivateUsage] assert tool_config == snapshot( { @@ -1406,7 +1408,7 @@ async def test_bedrock_no_tool_choice(bedrock_provider: BedrockProvider): # Anthropic supports tool_choice model = BedrockConverseModel('us.anthropic.claude-3-7-sonnet-20250219-v1:0', provider=bedrock_provider) - tool_config = model._map_tool_config(mrp) # type: ignore[reportPrivateUsage] + tool_config = model._map_tool_config(mrp, BedrockModelSettings()) # type: ignore[reportPrivateUsage] assert tool_config == snapshot( { @@ -1427,7 +1429,7 @@ async def test_bedrock_no_tool_choice(bedrock_provider: BedrockProvider): # Other models don't support tool_choice model = BedrockConverseModel('us.meta.llama4-maverick-17b-instruct-v1:0', provider=bedrock_provider) - tool_config = model._map_tool_config(mrp) # type: ignore[reportPrivateUsage] + tool_config = model._map_tool_config(mrp, BedrockModelSettings()) # type: ignore[reportPrivateUsage] assert tool_config == snapshot( { @@ -1515,12 +1517,149 @@ async def test_bedrock_streaming_error(allow_model_requests: None, bedrock_provi assert exc_info.value.body.get('Error', {}).get('Message') == 'The provided model identifier is invalid.' # type: ignore[union-attr] -async def test_cache_point_filtering(): - """Test that CachePoint is filtered out in Bedrock message mapping.""" - from itertools import count +@pytest.mark.vcr() +async def test_bedrock_cache_point_adds_cache_control(allow_model_requests: None, bedrock_provider: BedrockProvider): + """Record a real Bedrock call to confirm cache points reach AWS (requires ~1k tokens).""" + model = BedrockConverseModel('us.anthropic.claude-sonnet-4-5-20250929-v1:0', provider=bedrock_provider) + agent = Agent(model=model) + long_context = 'long prompt' * 600 # More tokens to activate a cache + + result = await agent.run([long_context, CachePoint(), 'Response only number What is 2 + 3']) + assert result.output == snapshot('5') + assert result.usage() == snapshot(RunUsage(input_tokens=15, output_tokens=5, cache_write_tokens=1203, requests=1)) + + +@pytest.mark.vcr() +async def test_bedrock_cache_write_and_read(allow_model_requests: None, bedrock_provider: BedrockProvider): + """Integration test covering tool and instruction caching using a recorded cassette.""" + model = BedrockConverseModel('us.anthropic.claude-sonnet-4-5-20250929-v1:0', provider=bedrock_provider) + system_prompt = 'YOU MUST RESPONSE ONLY WITH SINGLE NUMBER\n' * 50 # More tokens to activate a cache + agent = Agent( + model, + system_prompt=system_prompt, + model_settings=BedrockModelSettings( + bedrock_cache_tool_definitions=True, + bedrock_cache_instructions=True, + ), + ) + + @agent.tool_plain + def catalog_lookup() -> str: # pragma: no cover - exercised via agent call + return 'catalog-ok' + + @agent.tool_plain + def diagnostics() -> str: # pragma: no cover - exercised via agent call + return 'diagnostics-ok' - # Test the static method directly - messages = await BedrockConverseModel._map_user_prompt(UserPromptPart(content=['text', CachePoint()]), count()) # pyright: ignore[reportPrivateUsage] - # CachePoint should be filtered out, message should still be valid - assert len(messages) == 1 - assert messages[0]['role'] == 'user' + long_context = 'Newer response with something except single number\n' * 10 + document = BinaryContent(data=b'You are a great mathematician', media_type='text/plain') + run_args = [long_context, CachePoint(), document, CachePoint(), 'What is 10 + 11?'] + + first = await agent.run(run_args) + assert first.output == snapshot('21') + first_usage = first.usage() + assert first_usage == snapshot(RunUsage(input_tokens=11, output_tokens=5, cache_write_tokens=1313, requests=1)) + + second = await agent.run(run_args) + assert second.output == snapshot('21') + second_usage = second.usage() + assert second_usage == snapshot(RunUsage(input_tokens=11, output_tokens=5, cache_read_tokens=1313, requests=1)) + + +async def test_bedrock_cache_point_as_first_content_raises_error( + allow_model_requests: None, bedrock_provider: BedrockProvider +): + """CachePoint should raise a UserError if it appears before any other content.""" + model = BedrockConverseModel('anthropic.claude-3-7-sonnet-20250219-v1:0', provider=bedrock_provider) + messages: list[ModelMessage] = [ModelRequest(parts=[UserPromptPart(content=[CachePoint(), 'This should fail'])])] + with pytest.raises(UserError, match='CachePoint cannot be the first content in a user message'): + await model._map_messages(messages, ModelRequestParameters(), BedrockModelSettings()) # pyright: ignore[reportPrivateUsage] + + +# Bedrock currently errors if a cache point immediately follows non-text content, so we inject a newline block. +async def test_bedrock_cache_point_after_binary_content_workaround( + allow_model_requests: None, bedrock_provider: BedrockProvider +): + model = BedrockConverseModel('us.anthropic.claude-3-5-sonnet-20240620-v1:0', provider=bedrock_provider) + messages: list[ModelMessage] = [ + ModelRequest( + parts=[ + UserPromptPart( + content=[ + 'Process the attached text file. Return the answer only.', + BinaryContent(data=b'What is 2+2? Provide the answer only.', media_type='text/plain'), + CachePoint(), + ] + ) + ] + ) + ] + _, bedrock_messages = await model._map_messages(messages, ModelRequestParameters(), BedrockModelSettings()) # pyright: ignore[reportPrivateUsage] + assert bedrock_messages[0]['content'] == snapshot( + [ + {'text': 'Process the attached text file. Return the answer only.'}, + { + 'document': { + 'name': 'Document 1', + 'format': 'txt', + 'source': {'bytes': b'What is 2+2? Provide the answer only.'}, + } + }, + {'text': '\n'}, # Empty line after BinaryContent as temp workaround unless bedrock will fix the bug + {'cachePoint': {'type': 'default'}}, + ] + ) + + +async def test_bedrock_cache_point_multiple_markers(allow_model_requests: None, bedrock_provider: BedrockProvider): + model = BedrockConverseModel('us.anthropic.claude-3-5-haiku-20241022-v1:0', provider=bedrock_provider) + messages: list[ModelMessage] = [ + ModelRequest( + parts=[UserPromptPart(content=['First chunk', CachePoint(), 'Second chunk', CachePoint(), 'Question'])] + ) + ] + _, bedrock_messages = await model._map_messages(messages, ModelRequestParameters(), BedrockModelSettings()) # pyright: ignore[reportPrivateUsage] + assert bedrock_messages[0]['content'] == snapshot( + [ + {'text': 'First chunk'}, + {'cachePoint': {'type': 'default'}}, + {'text': 'Second chunk'}, + {'cachePoint': {'type': 'default'}}, + {'text': 'Question'}, + ] + ) + + +async def test_bedrock_cache_tool_definitions(allow_model_requests: None, bedrock_provider: BedrockProvider): + model = BedrockConverseModel('anthropic.claude-3-5-sonnet-20241022-v2:0', provider=bedrock_provider) + params = ModelRequestParameters( + function_tools=[ + ToolDefinition(name='tool_one'), + ToolDefinition(name='tool_two'), + ] + ) + params = model.customize_request_parameters(params) + tool_config = model._map_tool_config( # pyright: ignore[reportPrivateUsage] + params, + BedrockModelSettings(bedrock_cache_tool_definitions=True), + ) + assert tool_config and len(tool_config['tools']) == 3 + assert tool_config['tools'][-1] == {'cachePoint': {'type': 'default'}} + + +async def test_bedrock_cache_instructions(allow_model_requests: None, bedrock_provider: BedrockProvider): + model = BedrockConverseModel('us.anthropic.claude-3-5-sonnet-20240620-v1:0', provider=bedrock_provider) + messages: list[ModelMessage] = [ + ModelRequest(parts=[SystemPromptPart(content='System instructions to cache.'), UserPromptPart(content='Hi!')]) + ] + system_prompt, _ = await model._map_messages( # pyright: ignore[reportPrivateUsage] + messages, + ModelRequestParameters(), + BedrockModelSettings(bedrock_cache_instructions=True), + ) + assert system_prompt == snapshot( + [ + {'text': 'System instructions to cache.'}, + {'cachePoint': {'type': 'default'}}, + ] + ) From 59c6868225b3d36045f6cc7c540b8a5a221240e8 Mon Sep 17 00:00:00 2001 From: Denys Moskalenko Date: Sat, 15 Nov 2025 17:12:05 +0100 Subject: [PATCH 2/3] Skip documentation examples from tests --- docs/models/bedrock.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/models/bedrock.md b/docs/models/bedrock.md index 667ca487d4..9cd42bcdc3 100644 --- a/docs/models/bedrock.md +++ b/docs/models/bedrock.md @@ -87,7 +87,7 @@ Bedrock supports [prompt caching](https://docs.aws.amazon.com/bedrock/latest/use You can combine all of them: -```python +```python {test="skip"} from pydantic_ai import Agent, CachePoint, RunContext from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelSettings @@ -128,7 +128,7 @@ async def main(): Access cache usage statistics via [`RequestUsage`][pydantic_ai.usage.RequestUsage]: -```python +```python {test="skip"} from pydantic_ai import Agent, CachePoint agent = Agent('bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0') From 900d5423e6393f8d81f6721d65b0f199b24a815b Mon Sep 17 00:00:00 2001 From: Denys Moskalenko Date: Wed, 19 Nov 2025 11:59:07 +0100 Subject: [PATCH 3/3] Parametrize `test_bedrock_cache_point_adds_cache_control` to support multiple models --- ...edrock_cache_point_adds_cache_control.yaml | 103 ------------------ ...adds_cache_control[claude-sonnet-4-5].yaml | 91 ++++++++++++++++ ...e_point_adds_cache_control[nova-lite].yaml | 91 ++++++++++++++++ tests/models/test_bedrock.py | 34 ++++-- 4 files changed, 206 insertions(+), 113 deletions(-) delete mode 100644 tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml create mode 100644 tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[claude-sonnet-4-5].yaml create mode 100644 tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[nova-lite].yaml diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml b/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml deleted file mode 100644 index 4bd60afe27..0000000000 --- a/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control.yaml +++ /dev/null @@ -1,103 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "user", "content": [{"text": "long promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong promptlong - prompt"}, {"cachePoint": {"type": "default"}}, {"text": "Response only number What is 2 + 3"}]}], "system": [], "inferenceConfig": - {}}' - headers: - amz-sdk-invocation-id: - - !!binary | - MWQ3YjUzZDItNTI1NS00NDJhLWE5ZjAtZDM0YTMzMzcxOTI5 - amz-sdk-request: - - !!binary | - YXR0ZW1wdD0x - content-length: - - '6781' - content-type: - - !!binary | - YXBwbGljYXRpb24vanNvbg== - method: POST - uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-sonnet-4-5-20250929-v1%3A0/converse - response: - headers: - connection: - - keep-alive - content-length: - - '321' - content-type: - - application/json - parsed_body: - metrics: - latencyMs: 1867 - output: - message: - content: - - text: '5' - role: assistant - stopReason: end_turn - usage: - cacheReadInputTokenCount: 0 - cacheReadInputTokens: 0 - cacheWriteInputTokenCount: 1203 - cacheWriteInputTokens: 1203 - inputTokens: 15 - outputTokens: 5 - serverToolUsage: {} - totalTokens: 1223 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[claude-sonnet-4-5].yaml b/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[claude-sonnet-4-5].yaml new file mode 100644 index 0000000000..519eebbf3d --- /dev/null +++ b/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[claude-sonnet-4-5].yaml @@ -0,0 +1,91 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": [{"text": "ONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\n"}, {"cachePoint": {"type": "default"}}, {"text": "Response + only number What is 2 + 3"}]}], "system": [{"text": "YOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\n"}, {"cachePoint": {"type": "default"}}], "inferenceConfig": {}}' + headers: + amz-sdk-invocation-id: + - !!binary | + Y2RmYWJiOGYtYjM0MC00NzY4LTgwZTEtMDI5NzZiZDdiZjVm + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + content-length: + - '5580' + content-type: + - !!binary | + YXBwbGljYXRpb24vanNvbg== + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-sonnet-4-5-20250929-v1%3A0/converse + response: + headers: + connection: + - keep-alive + content-length: + - '321' + content-type: + - application/json + parsed_body: + metrics: + latencyMs: 2015 + output: + message: + content: + - text: '5' + role: assistant + stopReason: end_turn + usage: + cacheReadInputTokenCount: 0 + cacheReadInputTokens: 0 + cacheWriteInputTokenCount: 1503 + cacheWriteInputTokens: 1503 + inputTokens: 14 + outputTokens: 5 + serverToolUsage: {} + totalTokens: 1522 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[nova-lite].yaml b/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[nova-lite].yaml new file mode 100644 index 0000000000..8cfeda778e --- /dev/null +++ b/tests/models/cassettes/test_bedrock/test_bedrock_cache_point_adds_cache_control[nova-lite].yaml @@ -0,0 +1,91 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": [{"text": "ONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\nONLY + SINGLE NUMBER IN RESPONSE\nONLY SINGLE NUMBER IN RESPONSE\n"}, {"cachePoint": {"type": "default"}}, {"text": "Response + only number What is 2 + 3"}]}], "system": [{"text": "YOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU + MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE + NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY + WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE + ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST RESPONSE ONLY WITH SINGLE NUMBER\nYOU MUST + RESPONSE ONLY WITH SINGLE NUMBER\n"}, {"cachePoint": {"type": "default"}}], "inferenceConfig": {}}' + headers: + amz-sdk-invocation-id: + - !!binary | + MWRhOGY1NGItNThhMC00YTc3LTg0YjYtNzIyZGEwOGVmMjI0 + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + content-length: + - '5580' + content-type: + - !!binary | + YXBwbGljYXRpb24vanNvbg== + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-lite-v1%3A0/converse + response: + headers: + connection: + - keep-alive + content-length: + - '320' + content-type: + - application/json + parsed_body: + metrics: + latencyMs: 797 + output: + message: + content: + - text: '5' + role: assistant + stopReason: end_turn + usage: + cacheReadInputTokenCount: 0 + cacheReadInputTokens: 0 + cacheWriteInputTokenCount: 1298 + cacheWriteInputTokens: 1298 + inputTokens: 10 + outputTokens: 2 + serverToolUsage: {} + totalTokens: 1310 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_bedrock.py b/tests/models/test_bedrock.py index e36852e9e0..71f2ee6abc 100644 --- a/tests/models/test_bedrock.py +++ b/tests/models/test_bedrock.py @@ -44,7 +44,7 @@ from ..conftest import IsDatetime, IsInstance, IsStr, try_import with try_import() as imports_successful: - from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelSettings + from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelName, BedrockModelSettings from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings from pydantic_ai.providers.bedrock import BedrockProvider from pydantic_ai.providers.openai import OpenAIProvider @@ -1518,25 +1518,39 @@ async def test_bedrock_streaming_error(allow_model_requests: None, bedrock_provi @pytest.mark.vcr() -async def test_bedrock_cache_point_adds_cache_control(allow_model_requests: None, bedrock_provider: BedrockProvider): +@pytest.mark.parametrize( + 'model_name', + [ + pytest.param('us.anthropic.claude-sonnet-4-5-20250929-v1:0', id='claude-sonnet-4-5'), + pytest.param('us.amazon.nova-lite-v1:0', id='nova-lite'), + ], +) +async def test_bedrock_cache_point_adds_cache_control( + allow_model_requests: None, bedrock_provider: BedrockProvider, model_name: BedrockModelName +): """Record a real Bedrock call to confirm cache points reach AWS (requires ~1k tokens).""" - model = BedrockConverseModel('us.anthropic.claude-sonnet-4-5-20250929-v1:0', provider=bedrock_provider) - agent = Agent(model=model) - long_context = 'long prompt' * 600 # More tokens to activate a cache + model = BedrockConverseModel(model_name, provider=bedrock_provider) + agent = Agent( + model, + system_prompt='YOU MUST RESPONSE ONLY WITH SINGLE NUMBER\n' * 50, # More tokens to activate a cache + model_settings=BedrockModelSettings(bedrock_cache_instructions=True), + ) + long_context = 'ONLY SINGLE NUMBER IN RESPONSE\n' * 100 # More tokens to activate a cache result = await agent.run([long_context, CachePoint(), 'Response only number What is 2 + 3']) assert result.output == snapshot('5') - assert result.usage() == snapshot(RunUsage(input_tokens=15, output_tokens=5, cache_write_tokens=1203, requests=1)) + # Different tokens usage depending on a model + assert result.usage().cache_write_tokens >= 1000 + assert result.usage().input_tokens <= 20 @pytest.mark.vcr() async def test_bedrock_cache_write_and_read(allow_model_requests: None, bedrock_provider: BedrockProvider): """Integration test covering tool and instruction caching using a recorded cassette.""" model = BedrockConverseModel('us.anthropic.claude-sonnet-4-5-20250929-v1:0', provider=bedrock_provider) - system_prompt = 'YOU MUST RESPONSE ONLY WITH SINGLE NUMBER\n' * 50 # More tokens to activate a cache agent = Agent( model, - system_prompt=system_prompt, + system_prompt='YOU MUST RESPONSE ONLY WITH SINGLE NUMBER\n' * 50, # More tokens to activate a cache model_settings=BedrockModelSettings( bedrock_cache_tool_definitions=True, bedrock_cache_instructions=True, @@ -1580,7 +1594,7 @@ async def test_bedrock_cache_point_as_first_content_raises_error( async def test_bedrock_cache_point_after_binary_content_workaround( allow_model_requests: None, bedrock_provider: BedrockProvider ): - model = BedrockConverseModel('us.anthropic.claude-3-5-sonnet-20240620-v1:0', provider=bedrock_provider) + model = BedrockConverseModel('us.anthropic.claude-haiku-4-5-20251001-v1:0', provider=bedrock_provider) messages: list[ModelMessage] = [ ModelRequest( parts=[ @@ -1631,7 +1645,7 @@ async def test_bedrock_cache_point_multiple_markers(allow_model_requests: None, async def test_bedrock_cache_tool_definitions(allow_model_requests: None, bedrock_provider: BedrockProvider): - model = BedrockConverseModel('anthropic.claude-3-5-sonnet-20241022-v2:0', provider=bedrock_provider) + model = BedrockConverseModel('anthropic.claude-sonnet-4-20250514-v1:0', provider=bedrock_provider) params = ModelRequestParameters( function_tools=[ ToolDefinition(name='tool_one'),