|
1 | 1 | import os |
| 2 | +import unittest.mock |
2 | 3 |
|
3 | 4 | import pydantic |
4 | 5 | import pytest |
5 | 6 |
|
6 | 7 | import strands |
7 | 8 | from strands import Agent, tool |
8 | 9 | from strands.models.openai import OpenAIModel |
| 10 | +from strands.types.exceptions import ContextWindowOverflowException, ModelThrottledException |
9 | 11 | from tests_integ.models import providers |
10 | 12 |
|
11 | 13 | # these tests only run if we have the openai api key |
@@ -167,3 +169,55 @@ def tool_with_image_return(): |
167 | 169 | # 'user', but this message with role 'tool' contains an image URL." |
168 | 170 | # See https://github.com/strands-agents/sdk-python/issues/320 for additional details |
169 | 171 | agent("Run the the tool and analyze the image") |
| 172 | + |
| 173 | + |
| 174 | +def test_context_window_overflow_integration(): |
| 175 | + """Integration test for context window overflow with OpenAI. |
| 176 | +
|
| 177 | + This test verifies that when a request exceeds the model's context window, |
| 178 | + the OpenAI model properly raises a ContextWindowOverflowException. |
| 179 | + """ |
| 180 | + # Use gpt-4o-mini which has a smaller context window to make this test more reliable |
| 181 | + mini_model = OpenAIModel( |
| 182 | + model_id="gpt-4o-mini-2024-07-18", |
| 183 | + client_args={ |
| 184 | + "api_key": os.getenv("OPENAI_API_KEY"), |
| 185 | + }, |
| 186 | + ) |
| 187 | + |
| 188 | + agent = Agent(model=mini_model) |
| 189 | + |
| 190 | + # Create a very long text that should exceed context window |
| 191 | + # This text is designed to be long enough to exceed context but not hit token rate limits |
| 192 | + long_text = ( |
| 193 | + "This text is longer than context window, but short enough to not get caught in token rate limit. " * 6800 |
| 194 | + ) |
| 195 | + |
| 196 | + # This should raise ContextWindowOverflowException which gets handled by conversation manager |
| 197 | + # The agent should attempt to reduce context and retry |
| 198 | + with pytest.raises(ContextWindowOverflowException): |
| 199 | + agent(long_text) |
| 200 | + |
| 201 | + |
| 202 | +def test_rate_limit_throttling_integration_no_retries(model): |
| 203 | + """Integration test for rate limit handling with retries disabled. |
| 204 | +
|
| 205 | + This test verifies that when a request exceeds OpenAI's rate limits, |
| 206 | + the model properly raises a ModelThrottledException. We disable retries |
| 207 | + to avoid waiting for the exponential backoff during testing. |
| 208 | + """ |
| 209 | + # Patch the event loop constants to disable retries for this test |
| 210 | + with unittest.mock.patch("strands.event_loop.event_loop.MAX_ATTEMPTS", 1): |
| 211 | + agent = Agent(model=model) |
| 212 | + |
| 213 | + # Create a message that's very long to trigger token-per-minute rate limits |
| 214 | + # This should be large enough to exceed TPM limits immediately |
| 215 | + very_long_text = "Really long text " * 20000 |
| 216 | + |
| 217 | + # This should raise ModelThrottledException without retries |
| 218 | + with pytest.raises(ModelThrottledException) as exc_info: |
| 219 | + agent(very_long_text) |
| 220 | + |
| 221 | + # Verify it's a rate limit error |
| 222 | + error_message = str(exc_info.value).lower() |
| 223 | + assert "rate limit" in error_message or "tokens per min" in error_message |
0 commit comments