feat: add Bedrock count_tokens support plus usage-limit coverage

DenysMoskalenko · DenysMoskalenko · commit 874be88ef58e · 2025-11-07T12:55:29.000+01:00
- Wire BedrockConverseModel.count_tokens to the Bedrock Runtime count_tokens API and reuse the converse payload builder for both count and inference calls.
  - Update pytest cassettes + dependency floor so Bedrock token preflight can run with real responses, and add a CLI helper for capturing new recordings.
  - Add usage-limit tests (with fresh VCR data) and a small unit test for _remove_inference_geo_prefix to keep the behavior covered once the new count flow is exercised.
diff --git a/pydantic_ai_slim/pydantic_ai/models/__init__.py b/pydantic_ai_slim/pydantic_ai/models/__init__.py
@@ -103,6 +103,13 @@
         'bedrock:us.anthropic.claude-opus-4-20250514-v1:0',
         'bedrock:anthropic.claude-sonnet-4-20250514-v1:0',
         'bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0',
+        'bedrock:eu.anthropic.claude-sonnet-4-20250514-v1:0',
+        'bedrock:anthropic.claude-sonnet-4-5-20250929-v1:0',
+        'bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0',
+        'bedrock:eu.anthropic.claude-sonnet-4-5-20250929-v1:0',
+        'bedrock:anthropic.claude-haiku-4-5-20251001-v1:0',
+        'bedrock:us.anthropic.claude-haiku-4-5-20251001-v1:0',
+        'bedrock:eu.anthropic.claude-haiku-4-5-20251001-v1:0',
         'bedrock:cohere.command-text-v14',
         'bedrock:cohere.command-r-v1:0',
         'bedrock:cohere.command-r-plus-v1:0',
diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py
@@ -59,6 +59,7 @@
         ConverseStreamMetadataEventTypeDef,
         ConverseStreamOutputTypeDef,
         ConverseStreamResponseTypeDef,
+        CountTokensRequestTypeDef,
         DocumentBlockTypeDef,
         GuardrailConfigurationTypeDef,
         ImageBlockTypeDef,
@@ -75,7 +76,6 @@
         VideoBlockTypeDef,
     )
 
-
 LatestBedrockModelNames = Literal[
     'amazon.titan-tg1-large',
     'amazon.titan-text-lite-v1',
@@ -104,6 +104,13 @@
     'us.anthropic.claude-opus-4-20250514-v1:0',
     'anthropic.claude-sonnet-4-20250514-v1:0',
     'us.anthropic.claude-sonnet-4-20250514-v1:0',
+    'eu.anthropic.claude-sonnet-4-20250514-v1:0',
+    'anthropic.claude-sonnet-4-5-20250929-v1:0',
+    'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
+    'eu.anthropic.claude-sonnet-4-5-20250929-v1:0',
+    'anthropic.claude-haiku-4-5-20251001-v1:0',
+    'us.anthropic.claude-haiku-4-5-20251001-v1:0',
+    'eu.anthropic.claude-haiku-4-5-20251001-v1:0',
     'cohere.command-text-v14',
     'cohere.command-r-v1:0',
     'cohere.command-r-plus-v1:0',
@@ -134,7 +141,6 @@
 See [the Bedrock docs](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for a full list.
 """
 
-
 P = ParamSpec('P')
 T = typing.TypeVar('T')
 
@@ -147,6 +153,13 @@
     'tool_use': 'tool_call',
 }
 
+_AWS_BEDROCK_INFERENCE_GEO_PREFIXES: tuple[str, ...] = ('us.', 'eu.', 'apac.', 'jp.', 'au.', 'ca.')
+"""Geo prefixes for Bedrock inference profile IDs (e.g., 'eu.', 'us.').
+
+Used to strip the geo prefix so we can pass a pure foundation model ID/ARN to CountTokens,
+which does not accept profile IDs. Extend if new geos appear (e.g., 'global.', 'us-gov.').
+"""
+
 
 class BedrockModelSettings(ModelSettings, total=False):
     """Settings for Bedrock models.
@@ -273,6 +286,30 @@ async def request(
         model_response = await self._process_response(response)
         return model_response
 
+    async def count_tokens(
+        self,
+        messages: list[ModelMessage],
+        model_settings: ModelSettings | None,
+        model_request_parameters: ModelRequestParameters,
+    ) -> usage.RequestUsage:
+        """Count the number of tokens, works with limited models.
+
+        Check the actual supported models on <https://docs.aws.amazon.com/bedrock/latest/userguide/count-tokens.html>
+        """
+        model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
+        system_prompt, bedrock_messages = await self._map_messages(messages, model_request_parameters)
+        params: CountTokensRequestTypeDef = {
+            'modelId': self._remove_inference_geo_prefix(self.model_name),
+            'input': {
+                'converse': {
+                    'messages': bedrock_messages,
+                    'system': system_prompt,
+                },
+            },
+        }
+        response = await anyio.to_thread.run_sync(functools.partial(self.client.count_tokens, **params))
+        return usage.RequestUsage(input_tokens=response['inputTokens'])
+
     @asynccontextmanager
     async def request_stream(
         self,
@@ -634,6 +671,14 @@ def _map_tool_call(t: ToolCallPart) -> ContentBlockOutputTypeDef:
             'toolUse': {'toolUseId': _utils.guard_tool_call_id(t=t), 'name': t.tool_name, 'input': t.args_as_dict()}
         }
 
+    @staticmethod
+    def _remove_inference_geo_prefix(model_name: BedrockModelName) -> BedrockModelName:
+        """Remove inference geographic prefix from model ID if present."""
+        for prefix in _AWS_BEDROCK_INFERENCE_GEO_PREFIXES:
+            if model_name.startswith(prefix):
+                return model_name.removeprefix(prefix)
+        return model_name
+
 
 @dataclass
 class BedrockStreamedResponse(StreamedResponse):
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
@@ -74,7 +74,7 @@ google = ["google-genai>=1.46.0"]
 anthropic = ["anthropic>=0.70.0"]
 groq = ["groq>=0.25.0"]
 mistral = ["mistralai>=1.9.10"]
-bedrock = ["boto3>=1.39.0"]
+bedrock = ["boto3>=1.40.14"]
 huggingface = ["huggingface-hub[inference]>=0.33.5"]
 outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
 outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -424,6 +424,7 @@ def bedrock_provider():
             region_name=os.getenv('AWS_REGION', 'us-east-1'),
             aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID', 'AKIA6666666666666666'),
             aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY', '6666666666666666666666666666666666666666'),
+            aws_session_token=os.getenv('AWS_SESSION_TOKEN', None),
         )
         yield BedrockProvider(bedrock_client=bedrock_client)
         bedrock_client.close()
diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_exceeded.yaml b/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_exceeded.yaml
@@ -0,0 +1,32 @@
+interactions:
+- request:
+    body: '{"input": {"converse": {"messages": [{"role": "user", "content": [{"text": "The quick brown fox jumps over the
+      lazydog."}]}], "system": []}}}'
+    headers:
+      amz-sdk-invocation-id:
+      - !!binary |
+        ZDYxNmVkOTktYzgwMi00MDE0LTljZGUtYWFjMjk5N2I2MDFj
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      content-length:
+      - '141'
+      content-type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-sonnet-4-20250514-v1%3A0/count-tokens
+  response:
+    headers:
+      connection:
+      - keep-alive
+      content-length:
+      - '18'
+      content-type:
+      - application/json
+    parsed_body:
+      inputTokens: 19
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_not_exceeded.yaml b/tests/models/cassettes/test_bedrock/test_bedrock_model_usage_limit_not_exceeded.yaml
@@ -0,0 +1,78 @@
+interactions:
+- request:
+    body: '{"input": {"converse": {"messages": [{"role": "user", "content": [{"text": "The quick brown fox jumps over the
+      lazydog."}]}], "system": []}}}'
+    headers:
+      amz-sdk-invocation-id:
+      - !!binary |
+        OWQ3NzFhZmItYTkwYi00N2E4LWFkNjMtZmI5OTJhZDEyN2E4
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      content-length:
+      - '141'
+      content-type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-sonnet-4-20250514-v1%3A0/count-tokens
+  response:
+    headers:
+      connection:
+      - keep-alive
+      content-length:
+      - '18'
+      content-type:
+      - application/json
+    parsed_body:
+      inputTokens: 19
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"text": "The quick brown fox jumps over the lazydog."}]}], "system":
+      [], "inferenceConfig": {}}'
+    headers:
+      amz-sdk-invocation-id:
+      - !!binary |
+        MWMwNDdlYWEtOWIxMy00YjAyLWI3ZjMtMjZkNjQ2MDEzOTY2
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      content-length:
+      - '139'
+      content-type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-sonnet-4-20250514-v1%3A0/converse
+  response:
+    headers:
+      connection:
+      - keep-alive
+      content-length:
+      - '785'
+      content-type:
+      - application/json
+    parsed_body:
+      metrics:
+        latencyMs: 2333
+      output:
+        message:
+          content:
+          - text: "I notice there's a small typo in your message - it should be \"lazy dog\" (two words) rather than \"lazydog.\"\n\nThe corrected version is: \"The quick brown fox jumps over the lazy dog.\"\n\nThis is a famous pangram - a sentence that contains every letter of the English alphabet at least once. It's commonly used for testing typewriters, keyboards, fonts, and other applications where you want to display all the letters.\n\nIs there something specific you'd like to know about this phrase, or were you perhaps testing something?"
+          role: assistant
+      stopReason: end_turn
+      usage:
+        cacheReadInputTokenCount: 0
+        cacheReadInputTokens: 0
+        cacheWriteInputTokenCount: 0
+        cacheWriteInputTokens: 0
+        inputTokens: 19
+        outputTokens: 108
+        serverToolUsage: {}
+        totalTokens: 127
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/test_bedrock.py b/tests/models/test_bedrock.py
@@ -32,12 +32,12 @@
     VideoUrl,
 )
 from pydantic_ai.agent import Agent
-from pydantic_ai.exceptions import ModelRetry
+from pydantic_ai.exceptions import ModelRetry, UsageLimitExceeded
 from pydantic_ai.messages import AgentStreamEvent
 from pydantic_ai.models import ModelRequestParameters
 from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
 from pydantic_ai.tools import ToolDefinition
-from pydantic_ai.usage import RequestUsage, RunUsage
+from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
 
 from ..conftest import IsDatetime, IsInstance, IsStr, try_import
 
@@ -95,6 +95,58 @@ async def test_bedrock_model(allow_model_requests: None, bedrock_provider: Bedro
     )
 
 
+async def test_bedrock_model_usage_limit_exceeded(
+    allow_model_requests: None,
+    bedrock_provider: BedrockProvider,
+):
+    model = BedrockConverseModel('us.anthropic.claude-sonnet-4-20250514-v1:0', provider=bedrock_provider)
+    agent = Agent(model=model)
+
+    with pytest.raises(
+        UsageLimitExceeded,
+        match='The next request would exceed the input_tokens_limit of 18 \\(input_tokens=19\\)',
+    ):
+        await agent.run(
+            'The quick brown fox jumps over the lazydog.',
+            usage_limits=UsageLimits(input_tokens_limit=18, count_tokens_before_request=True),
+        )
+
+
+async def test_bedrock_model_usage_limit_not_exceeded(
+    allow_model_requests: None,
+    bedrock_provider: BedrockProvider,
+):
+    model = BedrockConverseModel('us.anthropic.claude-sonnet-4-20250514-v1:0', provider=bedrock_provider)
+    agent = Agent(model=model)
+
+    result = await agent.run(
+        'The quick brown fox jumps over the lazydog.',
+        usage_limits=UsageLimits(input_tokens_limit=25, count_tokens_before_request=True),
+    )
+
+    assert result.output == snapshot(
+        'I notice there\'s a small typo in your message - it should be "lazy dog" (two words) rather than '
+        '"lazydog."\n\nThe corrected version is: "The quick brown fox jumps over the lazy dog."\n\n'
+        'This is a famous pangram - a sentence that contains every letter of the English alphabet at least once. '
+        "It's commonly used for testing typewriters, keyboards, fonts, and other applications where you want to "
+        "display all the letters.\n\nIs there something specific you'd like to know about this phrase, or were you "
+        'perhaps testing something?'
+    )
+
+
+@pytest.mark.parametrize(
+    ('model_name', 'expected'),
+    [
+        ('us.anthropic.claude-sonnet-4-20250514-v1:0', 'anthropic.claude-sonnet-4-20250514-v1:0'),
+        ('eu.amazon.nova-micro-v1:0', 'amazon.nova-micro-v1:0'),
+        ('apac.meta.llama3-8b-instruct-v1:0', 'meta.llama3-8b-instruct-v1:0'),
+        ('anthropic.claude-sonnet-4-20250514-v1:0', 'anthropic.claude-sonnet-4-20250514-v1:0'),
+    ],
+)
+def test_remove_inference_geo_prefix(model_name: str, expected: str):
+    assert BedrockConverseModel._remove_inference_geo_prefix(model_name) == expected  # pyright: ignore[reportPrivateUsage]
+
+
 async def test_bedrock_model_structured_output(allow_model_requests: None, bedrock_provider: BedrockProvider):
     model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider)
     agent = Agent(model=model, system_prompt='You are a helpful chatbot.', retries=5)
@@ -542,11 +594,13 @@ async def test_text_document_url_input(allow_model_requests: None, bedrock_provi
     text_document_url = DocumentUrl(url='https://example-files.online-convert.com/document/txt/example.txt')
 
     result = await agent.run(['What is the main content on this document?', text_document_url])
-    assert result.output == snapshot("""\
-Based on the text in the <document_content> tag, the main content of this document appears to be:
+    assert result.output == snapshot(
+        """\
+        Based on the text in the <document_content> tag, the main content of this document appears to be:
 
-An example text describing the use of "John Doe" as a placeholder name in legal cases, hospitals, and other contexts where a party's real identity is unknown or needs to be withheld. It provides background on how "John Doe" and "Jane Doe" are commonly used in the United States and Canada for this purpose, in contrast to other English speaking countries that use names like "Joe Bloggs". The text gives examples of using John/Jane Doe for legal cases, unidentified corpses, and as generic names on forms. It also mentions how "Baby Doe" and "Precious Doe" are used for unidentified children.\
-""")
+        An example text describing the use of "John Doe" as a placeholder name in legal cases, hospitals, and other contexts where a party's real identity is unknown or needs to be withheld. It provides background on how "John Doe" and "Jane Doe" are commonly used in the United States and Canada for this purpose, in contrast to other English speaking countries that use names like "Joe Bloggs". The text gives examples of using John/Jane Doe for legal cases, unidentified corpses, and as generic names on forms. It also mentions how "Baby Doe" and "Precious Doe" are used for unidentified children.\
+        """
+    )
 
 
 @pytest.mark.vcr()
@@ -557,16 +611,18 @@ async def test_text_as_binary_content_input(allow_model_requests: None, bedrock_
     text_content = BinaryContent(data=b'This is a test document.', media_type='text/plain')
 
     result = await agent.run(['What is the main content on this document?', text_content])
-    assert result.output == snapshot("""\
-The document you're referring to appears to be a test document, which means its primary purpose is likely to serve as an example or a placeholder rather than containing substantive content. Test documents are commonly used for various purposes such as:
+    assert result.output == snapshot(
+        """\
+        The document you're referring to appears to be a test document, which means its primary purpose is likely to serve as an example or a placeholder rather than containing substantive content. Test documents are commonly used for various purposes such as:
 
-1. **Software Testing**: To verify that a system can correctly handle, display, or process documents.
-2. **Design Mockups**: To illustrate how a document might look in a particular format or style.
-3. **Training Materials**: To provide examples for instructional purposes.
-4. **Placeholders**: To fill space in a system or application where real content will eventually be placed.
+        1. **Software Testing**: To verify that a system can correctly handle, display, or process documents.
+        2. **Design Mockups**: To illustrate how a document might look in a particular format or style.
+        3. **Training Materials**: To provide examples for instructional purposes.
+        4. **Placeholders**: To fill space in a system or application where real content will eventually be placed.
 
-Since this is a test document, it probably doesn't contain any meaningful or specific information beyond what is necessary to serve its testing purpose. If you have specific questions about the format, structure, or any particular element within the document, feel free to ask!\
-""")
+        Since this is a test document, it probably doesn't contain any meaningful or specific information beyond what is necessary to serve its testing purpose. If you have specific questions about the format, structure, or any particular element within the document, feel free to ask!\
+        """
+    )
 
 
 @pytest.mark.vcr()
@@ -1091,11 +1147,13 @@ async def get_user_country() -> str:
         return 'Mexico'
 
     result = await agent.run('What is the largest city in the user country?')
-    assert result.output == snapshot("""\
-Based on your location in Mexico, the largest city is Mexico City (Ciudad de México). It's not only the capital but also the most populous city in Mexico with a metropolitan area population of over 21 million people, making it one of the largest urban agglomerations in the world.
+    assert result.output == snapshot(
+        """\
+        Based on your location in Mexico, the largest city is Mexico City (Ciudad de México). It's not only the capital but also the most populous city in Mexico with a metropolitan area population of over 21 million people, making it one of the largest urban agglomerations in the world.
 
-Mexico City is an important cultural, financial, and political center for the country and has a rich history dating back to the Aztec empire when it was known as Tenochtitlán.\
-""")
+        Mexico City is an important cultural, financial, and political center for the country and has a rich history dating back to the Aztec empire when it was known as Tenochtitlán.\
+        """
+    )
 
 
 async def test_bedrock_group_consecutive_tool_return_parts(bedrock_provider: BedrockProvider):
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -424,6 +424,7 @@ def bedrock_provider():`
`424`	`424`	`region_name=os.getenv('AWS_REGION', 'us-east-1'),`
`425`	`425`	`aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID', 'AKIA6666666666666666'),`
`426`	`426`	`aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY', '6666666666666666666666666666666666666666'),`
	`427`	`+ aws_session_token=os.getenv('AWS_SESSION_TOKEN', None),`
`427`	`428`	`)`
`428`	`429`	`yield BedrockProvider(bedrock_client=bedrock_client)`
`429`	`430`	`bedrock_client.close()`