diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py index 19085bea81..c165ceca4d 100644 --- a/dspy/clients/lm.py +++ b/dspy/clients/lm.py @@ -85,7 +85,13 @@ def __init__( model_family = model.split("/")[-1].lower() if "/" in model else model.lower() # Recognize OpenAI reasoning models (o1, o3, o4, gpt-5 family) - model_pattern = re.match(r"^(?:o[1345]|gpt-5)(?:-(?:mini|nano))?", model_family) + # Exclude non-reasoning variants like gpt-5-chat this is in azure ai foundry + # Allow date suffixes like -2023-01-01 after model name or mini/nano/pro + # For gpt-5, use negative lookahead to exclude -chat and allow other suffixes + model_pattern = re.match( + r"^(?:o[1345](?:-(?:mini|nano|pro))?(?:-\d{4}-\d{2}-\d{2})?|gpt-5(?!-chat)(?:-.*)?)$", + model_family, + ) if model_pattern: if (temperature and temperature != 1.0) or (max_tokens and max_tokens < 16000): diff --git a/tests/clients/test_lm.py b/tests/clients/test_lm.py index a24c09a9d4..64e76ecd9c 100644 --- a/tests/clients/test_lm.py +++ b/tests/clients/test_lm.py @@ -298,6 +298,7 @@ def test_reasoning_model_token_parameter(): ("openai/gpt-5", True), ("openai/gpt-5-mini", True), ("openai/gpt-5-nano", True), + ("azure/gpt-5-chat", False), # gpt-5-chat is NOT a reasoning model ("openai/gpt-4", False), ("anthropic/claude-2", False), ] @@ -318,7 +319,7 @@ def test_reasoning_model_token_parameter(): assert lm.kwargs["max_tokens"] == 1000 -@pytest.mark.parametrize("model_name", ["openai/o1", "openai/gpt-5-nano"]) +@pytest.mark.parametrize("model_name", ["openai/o1", "openai/gpt-5-nano", "openai/gpt-5-mini"]) def test_reasoning_model_requirements(model_name): # Should raise assertion error if temperature or max_tokens requirements not met with pytest.raises( @@ -347,6 +348,21 @@ def test_reasoning_model_requirements(model_name): assert lm.kwargs["max_completion_tokens"] is None +def test_gpt_5_chat_not_reasoning_model(): + """Test that gpt-5-chat is NOT treated as a reasoning model.""" + # Should NOT raise validation error - gpt-5-chat is not a reasoning model + lm = dspy.LM( + model="openai/gpt-5-chat", + temperature=0.7, # Can be any value + max_tokens=1000, # Can be any value + ) + # Should use max_tokens, not max_completion_tokens + assert "max_completion_tokens" not in lm.kwargs + assert "max_tokens" in lm.kwargs + assert lm.kwargs["max_tokens"] == 1000 + assert lm.kwargs["temperature"] == 0.7 + + def test_dump_state(): lm = dspy.LM( model="openai/gpt-4o-mini",