Skip to content

Commit 874be88

Browse files
feat: add Bedrock count_tokens support plus usage-limit coverage
- Wire BedrockConverseModel.count_tokens to the Bedrock Runtime count_tokens API and reuse the converse payload builder for both count and inference calls. - Update pytest cassettes + dependency floor so Bedrock token preflight can run with real responses, and add a CLI helper for capturing new recordings. - Add usage-limit tests (with fresh VCR data) and a small unit test for _remove_inference_geo_prefix to keep the behavior covered once the new count flow is exercised.
1 parent 5768447 commit 874be88

File tree

8 files changed

+258
-37
lines changed

8 files changed

+258
-37
lines changed

pydantic_ai_slim/pydantic_ai/models/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,13 @@
103103
'bedrock:us.anthropic.claude-opus-4-20250514-v1:0',
104104
'bedrock:anthropic.claude-sonnet-4-20250514-v1:0',
105105
'bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0',
106+
'bedrock:eu.anthropic.claude-sonnet-4-20250514-v1:0',
107+
'bedrock:anthropic.claude-sonnet-4-5-20250929-v1:0',
108+
'bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0',
109+
'bedrock:eu.anthropic.claude-sonnet-4-5-20250929-v1:0',
110+
'bedrock:anthropic.claude-haiku-4-5-20251001-v1:0',
111+
'bedrock:us.anthropic.claude-haiku-4-5-20251001-v1:0',
112+
'bedrock:eu.anthropic.claude-haiku-4-5-20251001-v1:0',
106113
'bedrock:cohere.command-text-v14',
107114
'bedrock:cohere.command-r-v1:0',
108115
'bedrock:cohere.command-r-plus-v1:0',

pydantic_ai_slim/pydantic_ai/models/bedrock.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
ConverseStreamMetadataEventTypeDef,
6060
ConverseStreamOutputTypeDef,
6161
ConverseStreamResponseTypeDef,
62+
CountTokensRequestTypeDef,
6263
DocumentBlockTypeDef,
6364
GuardrailConfigurationTypeDef,
6465
ImageBlockTypeDef,
@@ -75,7 +76,6 @@
7576
VideoBlockTypeDef,
7677
)
7778

78-
7979
LatestBedrockModelNames = Literal[
8080
'amazon.titan-tg1-large',
8181
'amazon.titan-text-lite-v1',
@@ -104,6 +104,13 @@
104104
'us.anthropic.claude-opus-4-20250514-v1:0',
105105
'anthropic.claude-sonnet-4-20250514-v1:0',
106106
'us.anthropic.claude-sonnet-4-20250514-v1:0',
107+
'eu.anthropic.claude-sonnet-4-20250514-v1:0',
108+
'anthropic.claude-sonnet-4-5-20250929-v1:0',
109+
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
110+
'eu.anthropic.claude-sonnet-4-5-20250929-v1:0',
111+
'anthropic.claude-haiku-4-5-20251001-v1:0',
112+
'us.anthropic.claude-haiku-4-5-20251001-v1:0',
113+
'eu.anthropic.claude-haiku-4-5-20251001-v1:0',
107114
'cohere.command-text-v14',
108115
'cohere.command-r-v1:0',
109116
'cohere.command-r-plus-v1:0',
@@ -134,7 +141,6 @@
134141
See [the Bedrock docs](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for a full list.
135142
"""
136143

137-
138144
P = ParamSpec('P')
139145
T = typing.TypeVar('T')
140146

@@ -147,6 +153,13 @@
147153
'tool_use': 'tool_call',
148154
}
149155

156+
_AWS_BEDROCK_INFERENCE_GEO_PREFIXES: tuple[str, ...] = ('us.', 'eu.', 'apac.', 'jp.', 'au.', 'ca.')
157+
"""Geo prefixes for Bedrock inference profile IDs (e.g., 'eu.', 'us.').
158+
159+
Used to strip the geo prefix so we can pass a pure foundation model ID/ARN to CountTokens,
160+
which does not accept profile IDs. Extend if new geos appear (e.g., 'global.', 'us-gov.').
161+
"""
162+
150163

151164
class BedrockModelSettings(ModelSettings, total=False):
152165
"""Settings for Bedrock models.
@@ -273,6 +286,30 @@ async def request(
273286
model_response = await self._process_response(response)
274287
return model_response
275288

289+
async def count_tokens(
290+
self,
291+
messages: list[ModelMessage],
292+
model_settings: ModelSettings | None,
293+
model_request_parameters: ModelRequestParameters,
294+
) -> usage.RequestUsage:
295+
"""Count the number of tokens, works with limited models.
296+
297+
Check the actual supported models on <https://docs.aws.amazon.com/bedrock/latest/userguide/count-tokens.html>
298+
"""
299+
model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
300+
system_prompt, bedrock_messages = await self._map_messages(messages, model_request_parameters)
301+
params: CountTokensRequestTypeDef = {
302+
'modelId': self._remove_inference_geo_prefix(self.model_name),
303+
'input': {
304+
'converse': {
305+
'messages': bedrock_messages,
306+
'system': system_prompt,
307+
},
308+
},
309+
}
310+
response = await anyio.to_thread.run_sync(functools.partial(self.client.count_tokens, **params))
311+
return usage.RequestUsage(input_tokens=response['inputTokens'])
312+
276313
@asynccontextmanager
277314
async def request_stream(
278315
self,
@@ -634,6 +671,14 @@ def _map_tool_call(t: ToolCallPart) -> ContentBlockOutputTypeDef:
634671
'toolUse': {'toolUseId': _utils.guard_tool_call_id(t=t), 'name': t.tool_name, 'input': t.args_as_dict()}
635672
}
636673

674+
@staticmethod
675+
def _remove_inference_geo_prefix(model_name: BedrockModelName) -> BedrockModelName:
676+
"""Remove inference geographic prefix from model ID if present."""
677+
for prefix in _AWS_BEDROCK_INFERENCE_GEO_PREFIXES:
678+
if model_name.startswith(prefix):
679+
return model_name.removeprefix(prefix)
680+
return model_name
681+
637682

638683
@dataclass
639684
class BedrockStreamedResponse(StreamedResponse):

pydantic_ai_slim/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ google = ["google-genai>=1.46.0"]
7474
anthropic = ["anthropic>=0.70.0"]
7575
groq = ["groq>=0.25.0"]
7676
mistral = ["mistralai>=1.9.10"]
77-
bedrock = ["boto3>=1.39.0"]
77+
bedrock = ["boto3>=1.40.14"]
7878
huggingface = ["huggingface-hub[inference]>=0.33.5"]
7979
outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
8080
outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ def bedrock_provider():
424424
region_name=os.getenv('AWS_REGION', 'us-east-1'),
425425
aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID', 'AKIA6666666666666666'),
426426
aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY', '6666666666666666666666666666666666666666'),
427+
aws_session_token=os.getenv('AWS_SESSION_TOKEN', None),
427428
)
428429
yield BedrockProvider(bedrock_client=bedrock_client)
429430
bedrock_client.close()
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
interactions:
2+
- request:
3+
body: '{"input": {"converse": {"messages": [{"role": "user", "content": [{"text": "The quick brown fox jumps over the
4+
lazydog."}]}], "system": []}}}'
5+
headers:
6+
amz-sdk-invocation-id:
7+
- !!binary |
8+
ZDYxNmVkOTktYzgwMi00MDE0LTljZGUtYWFjMjk5N2I2MDFj
9+
amz-sdk-request:
10+
- !!binary |
11+
YXR0ZW1wdD0x
12+
content-length:
13+
- '141'
14+
content-type:
15+
- !!binary |
16+
YXBwbGljYXRpb24vanNvbg==
17+
method: POST
18+
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-sonnet-4-20250514-v1%3A0/count-tokens
19+
response:
20+
headers:
21+
connection:
22+
- keep-alive
23+
content-length:
24+
- '18'
25+
content-type:
26+
- application/json
27+
parsed_body:
28+
inputTokens: 19
29+
status:
30+
code: 200
31+
message: OK
32+
version: 1
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
interactions:
2+
- request:
3+
body: '{"input": {"converse": {"messages": [{"role": "user", "content": [{"text": "The quick brown fox jumps over the
4+
lazydog."}]}], "system": []}}}'
5+
headers:
6+
amz-sdk-invocation-id:
7+
- !!binary |
8+
OWQ3NzFhZmItYTkwYi00N2E4LWFkNjMtZmI5OTJhZDEyN2E4
9+
amz-sdk-request:
10+
- !!binary |
11+
YXR0ZW1wdD0x
12+
content-length:
13+
- '141'
14+
content-type:
15+
- !!binary |
16+
YXBwbGljYXRpb24vanNvbg==
17+
method: POST
18+
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-sonnet-4-20250514-v1%3A0/count-tokens
19+
response:
20+
headers:
21+
connection:
22+
- keep-alive
23+
content-length:
24+
- '18'
25+
content-type:
26+
- application/json
27+
parsed_body:
28+
inputTokens: 19
29+
status:
30+
code: 200
31+
message: OK
32+
- request:
33+
body: '{"messages": [{"role": "user", "content": [{"text": "The quick brown fox jumps over the lazydog."}]}], "system":
34+
[], "inferenceConfig": {}}'
35+
headers:
36+
amz-sdk-invocation-id:
37+
- !!binary |
38+
MWMwNDdlYWEtOWIxMy00YjAyLWI3ZjMtMjZkNjQ2MDEzOTY2
39+
amz-sdk-request:
40+
- !!binary |
41+
YXR0ZW1wdD0x
42+
content-length:
43+
- '139'
44+
content-type:
45+
- !!binary |
46+
YXBwbGljYXRpb24vanNvbg==
47+
method: POST
48+
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-sonnet-4-20250514-v1%3A0/converse
49+
response:
50+
headers:
51+
connection:
52+
- keep-alive
53+
content-length:
54+
- '785'
55+
content-type:
56+
- application/json
57+
parsed_body:
58+
metrics:
59+
latencyMs: 2333
60+
output:
61+
message:
62+
content:
63+
- text: "I notice there's a small typo in your message - it should be \"lazy dog\" (two words) rather than \"lazydog.\"\n\nThe corrected version is: \"The quick brown fox jumps over the lazy dog.\"\n\nThis is a famous pangram - a sentence that contains every letter of the English alphabet at least once. It's commonly used for testing typewriters, keyboards, fonts, and other applications where you want to display all the letters.\n\nIs there something specific you'd like to know about this phrase, or were you perhaps testing something?"
64+
role: assistant
65+
stopReason: end_turn
66+
usage:
67+
cacheReadInputTokenCount: 0
68+
cacheReadInputTokens: 0
69+
cacheWriteInputTokenCount: 0
70+
cacheWriteInputTokens: 0
71+
inputTokens: 19
72+
outputTokens: 108
73+
serverToolUsage: {}
74+
totalTokens: 127
75+
status:
76+
code: 200
77+
message: OK
78+
version: 1

tests/models/test_bedrock.py

Lines changed: 76 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@
3232
VideoUrl,
3333
)
3434
from pydantic_ai.agent import Agent
35-
from pydantic_ai.exceptions import ModelRetry
35+
from pydantic_ai.exceptions import ModelRetry, UsageLimitExceeded
3636
from pydantic_ai.messages import AgentStreamEvent
3737
from pydantic_ai.models import ModelRequestParameters
3838
from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
3939
from pydantic_ai.tools import ToolDefinition
40-
from pydantic_ai.usage import RequestUsage, RunUsage
40+
from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
4141

4242
from ..conftest import IsDatetime, IsInstance, IsStr, try_import
4343

@@ -95,6 +95,58 @@ async def test_bedrock_model(allow_model_requests: None, bedrock_provider: Bedro
9595
)
9696

9797

98+
async def test_bedrock_model_usage_limit_exceeded(
99+
allow_model_requests: None,
100+
bedrock_provider: BedrockProvider,
101+
):
102+
model = BedrockConverseModel('us.anthropic.claude-sonnet-4-20250514-v1:0', provider=bedrock_provider)
103+
agent = Agent(model=model)
104+
105+
with pytest.raises(
106+
UsageLimitExceeded,
107+
match='The next request would exceed the input_tokens_limit of 18 \\(input_tokens=19\\)',
108+
):
109+
await agent.run(
110+
'The quick brown fox jumps over the lazydog.',
111+
usage_limits=UsageLimits(input_tokens_limit=18, count_tokens_before_request=True),
112+
)
113+
114+
115+
async def test_bedrock_model_usage_limit_not_exceeded(
116+
allow_model_requests: None,
117+
bedrock_provider: BedrockProvider,
118+
):
119+
model = BedrockConverseModel('us.anthropic.claude-sonnet-4-20250514-v1:0', provider=bedrock_provider)
120+
agent = Agent(model=model)
121+
122+
result = await agent.run(
123+
'The quick brown fox jumps over the lazydog.',
124+
usage_limits=UsageLimits(input_tokens_limit=25, count_tokens_before_request=True),
125+
)
126+
127+
assert result.output == snapshot(
128+
'I notice there\'s a small typo in your message - it should be "lazy dog" (two words) rather than '
129+
'"lazydog."\n\nThe corrected version is: "The quick brown fox jumps over the lazy dog."\n\n'
130+
'This is a famous pangram - a sentence that contains every letter of the English alphabet at least once. '
131+
"It's commonly used for testing typewriters, keyboards, fonts, and other applications where you want to "
132+
"display all the letters.\n\nIs there something specific you'd like to know about this phrase, or were you "
133+
'perhaps testing something?'
134+
)
135+
136+
137+
@pytest.mark.parametrize(
138+
('model_name', 'expected'),
139+
[
140+
('us.anthropic.claude-sonnet-4-20250514-v1:0', 'anthropic.claude-sonnet-4-20250514-v1:0'),
141+
('eu.amazon.nova-micro-v1:0', 'amazon.nova-micro-v1:0'),
142+
('apac.meta.llama3-8b-instruct-v1:0', 'meta.llama3-8b-instruct-v1:0'),
143+
('anthropic.claude-sonnet-4-20250514-v1:0', 'anthropic.claude-sonnet-4-20250514-v1:0'),
144+
],
145+
)
146+
def test_remove_inference_geo_prefix(model_name: str, expected: str):
147+
assert BedrockConverseModel._remove_inference_geo_prefix(model_name) == expected # pyright: ignore[reportPrivateUsage]
148+
149+
98150
async def test_bedrock_model_structured_output(allow_model_requests: None, bedrock_provider: BedrockProvider):
99151
model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider)
100152
agent = Agent(model=model, system_prompt='You are a helpful chatbot.', retries=5)
@@ -542,11 +594,13 @@ async def test_text_document_url_input(allow_model_requests: None, bedrock_provi
542594
text_document_url = DocumentUrl(url='https://example-files.online-convert.com/document/txt/example.txt')
543595

544596
result = await agent.run(['What is the main content on this document?', text_document_url])
545-
assert result.output == snapshot("""\
546-
Based on the text in the <document_content> tag, the main content of this document appears to be:
597+
assert result.output == snapshot(
598+
"""\
599+
Based on the text in the <document_content> tag, the main content of this document appears to be:
547600
548-
An example text describing the use of "John Doe" as a placeholder name in legal cases, hospitals, and other contexts where a party's real identity is unknown or needs to be withheld. It provides background on how "John Doe" and "Jane Doe" are commonly used in the United States and Canada for this purpose, in contrast to other English speaking countries that use names like "Joe Bloggs". The text gives examples of using John/Jane Doe for legal cases, unidentified corpses, and as generic names on forms. It also mentions how "Baby Doe" and "Precious Doe" are used for unidentified children.\
549-
""")
601+
An example text describing the use of "John Doe" as a placeholder name in legal cases, hospitals, and other contexts where a party's real identity is unknown or needs to be withheld. It provides background on how "John Doe" and "Jane Doe" are commonly used in the United States and Canada for this purpose, in contrast to other English speaking countries that use names like "Joe Bloggs". The text gives examples of using John/Jane Doe for legal cases, unidentified corpses, and as generic names on forms. It also mentions how "Baby Doe" and "Precious Doe" are used for unidentified children.\
602+
"""
603+
)
550604

551605

552606
@pytest.mark.vcr()
@@ -557,16 +611,18 @@ async def test_text_as_binary_content_input(allow_model_requests: None, bedrock_
557611
text_content = BinaryContent(data=b'This is a test document.', media_type='text/plain')
558612

559613
result = await agent.run(['What is the main content on this document?', text_content])
560-
assert result.output == snapshot("""\
561-
The document you're referring to appears to be a test document, which means its primary purpose is likely to serve as an example or a placeholder rather than containing substantive content. Test documents are commonly used for various purposes such as:
614+
assert result.output == snapshot(
615+
"""\
616+
The document you're referring to appears to be a test document, which means its primary purpose is likely to serve as an example or a placeholder rather than containing substantive content. Test documents are commonly used for various purposes such as:
562617
563-
1. **Software Testing**: To verify that a system can correctly handle, display, or process documents.
564-
2. **Design Mockups**: To illustrate how a document might look in a particular format or style.
565-
3. **Training Materials**: To provide examples for instructional purposes.
566-
4. **Placeholders**: To fill space in a system or application where real content will eventually be placed.
618+
1. **Software Testing**: To verify that a system can correctly handle, display, or process documents.
619+
2. **Design Mockups**: To illustrate how a document might look in a particular format or style.
620+
3. **Training Materials**: To provide examples for instructional purposes.
621+
4. **Placeholders**: To fill space in a system or application where real content will eventually be placed.
567622
568-
Since this is a test document, it probably doesn't contain any meaningful or specific information beyond what is necessary to serve its testing purpose. If you have specific questions about the format, structure, or any particular element within the document, feel free to ask!\
569-
""")
623+
Since this is a test document, it probably doesn't contain any meaningful or specific information beyond what is necessary to serve its testing purpose. If you have specific questions about the format, structure, or any particular element within the document, feel free to ask!\
624+
"""
625+
)
570626

571627

572628
@pytest.mark.vcr()
@@ -1091,11 +1147,13 @@ async def get_user_country() -> str:
10911147
return 'Mexico'
10921148

10931149
result = await agent.run('What is the largest city in the user country?')
1094-
assert result.output == snapshot("""\
1095-
Based on your location in Mexico, the largest city is Mexico City (Ciudad de México). It's not only the capital but also the most populous city in Mexico with a metropolitan area population of over 21 million people, making it one of the largest urban agglomerations in the world.
1150+
assert result.output == snapshot(
1151+
"""\
1152+
Based on your location in Mexico, the largest city is Mexico City (Ciudad de México). It's not only the capital but also the most populous city in Mexico with a metropolitan area population of over 21 million people, making it one of the largest urban agglomerations in the world.
10961153
1097-
Mexico City is an important cultural, financial, and political center for the country and has a rich history dating back to the Aztec empire when it was known as Tenochtitlán.\
1098-
""")
1154+
Mexico City is an important cultural, financial, and political center for the country and has a rich history dating back to the Aztec empire when it was known as Tenochtitlán.\
1155+
"""
1156+
)
10991157

11001158

11011159
async def test_bedrock_group_consecutive_tool_return_parts(bedrock_provider: BedrockProvider):

0 commit comments

Comments
 (0)