Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion dspy/clients/lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,13 @@ def __init__(
model_family = model.split("/")[-1].lower() if "/" in model else model.lower()

# Recognize OpenAI reasoning models (o1, o3, o4, gpt-5 family)
model_pattern = re.match(r"^(?:o[1345]|gpt-5)(?:-(?:mini|nano))?", model_family)
# Exclude non-reasoning variants like gpt-5-chat this is in azure ai foundry
# Allow date suffixes like -2023-01-01 after model name or mini/nano/pro
# For gpt-5, use negative lookahead to exclude -chat and allow other suffixes
model_pattern = re.match(
r"^(?:o[1345](?:-(?:mini|nano|pro))?(?:-\d{4}-\d{2}-\d{2})?|gpt-5(?!-chat)(?:-.*)?)$",
model_family,
)

if model_pattern:
if (temperature and temperature != 1.0) or (max_tokens and max_tokens < 16000):
Expand Down
18 changes: 17 additions & 1 deletion tests/clients/test_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ def test_reasoning_model_token_parameter():
("openai/gpt-5", True),
("openai/gpt-5-mini", True),
("openai/gpt-5-nano", True),
("azure/gpt-5-chat", False), # gpt-5-chat is NOT a reasoning model
("openai/gpt-4", False),
("anthropic/claude-2", False),
]
Expand All @@ -318,7 +319,7 @@ def test_reasoning_model_token_parameter():
assert lm.kwargs["max_tokens"] == 1000


@pytest.mark.parametrize("model_name", ["openai/o1", "openai/gpt-5-nano"])
@pytest.mark.parametrize("model_name", ["openai/o1", "openai/gpt-5-nano", "openai/gpt-5-mini"])
def test_reasoning_model_requirements(model_name):
# Should raise assertion error if temperature or max_tokens requirements not met
with pytest.raises(
Expand Down Expand Up @@ -347,6 +348,21 @@ def test_reasoning_model_requirements(model_name):
assert lm.kwargs["max_completion_tokens"] is None


def test_gpt_5_chat_not_reasoning_model():
"""Test that gpt-5-chat is NOT treated as a reasoning model."""
# Should NOT raise validation error - gpt-5-chat is not a reasoning model
lm = dspy.LM(
model="openai/gpt-5-chat",
temperature=0.7, # Can be any value
max_tokens=1000, # Can be any value
)
# Should use max_tokens, not max_completion_tokens
assert "max_completion_tokens" not in lm.kwargs
assert "max_tokens" in lm.kwargs
assert lm.kwargs["max_tokens"] == 1000
assert lm.kwargs["temperature"] == 0.7


def test_dump_state():
lm = dspy.LM(
model="openai/gpt-4o-mini",
Expand Down