Skip to content

Commit 50f71be

Browse files
committed
Support Claude 3.7 and use its extended thinking in research mode
Claude 3.7 Sonnet is Anthropic's first reasoning model. It provides a single model/api capable of standard and extended thinking. Utilize the extended thinking in Khoj's research mode. Increase default max output tokens to 8K for Anthropic models.
1 parent 69048a8 commit 50f71be

File tree

7 files changed

+68
-18
lines changed

7 files changed

+68
-18
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ dependencies = [
8686
"pytz ~= 2024.1",
8787
"cron-descriptor == 1.4.3",
8888
"django_apscheduler == 0.6.2",
89-
"anthropic == 0.26.1",
89+
"anthropic == 0.49.0",
9090
"docx2txt == 0.8",
9191
"google-generativeai == 0.8.3",
9292
"pyjson5 == 1.6.7",

src/khoj/processor/conversation/anthropic/anthropic_chat.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
def extract_questions_anthropic(
3333
text,
34-
model: Optional[str] = "claude-instant-1.2",
34+
model: Optional[str] = "claude-3-7-sonnet-latest",
3535
conversation_log={},
3636
api_key=None,
3737
temperature=0.7,
@@ -122,7 +122,7 @@ def extract_questions_anthropic(
122122
return questions
123123

124124

125-
def anthropic_send_message_to_model(messages, api_key, model, response_type="text", tracer={}):
125+
def anthropic_send_message_to_model(messages, api_key, model, response_type="text", deepthought=False, tracer={}):
126126
"""
127127
Send message to model
128128
"""
@@ -135,6 +135,7 @@ def anthropic_send_message_to_model(messages, api_key, model, response_type="tex
135135
model_name=model,
136136
api_key=api_key,
137137
response_type=response_type,
138+
deepthought=deepthought,
138139
tracer=tracer,
139140
)
140141

@@ -145,7 +146,7 @@ def converse_anthropic(
145146
online_results: Optional[Dict[str, Dict]] = None,
146147
code_results: Optional[Dict[str, Dict]] = None,
147148
conversation_log={},
148-
model: Optional[str] = "claude-3-5-sonnet-20241022",
149+
model: Optional[str] = "claude-3-7-sonnet-latest",
149150
api_key: Optional[str] = None,
150151
completion_func=None,
151152
conversation_commands=[ConversationCommand.Default],
@@ -160,6 +161,7 @@ def converse_anthropic(
160161
generated_files: List[FileAttachment] = None,
161162
program_execution_context: Optional[List[str]] = None,
162163
generated_asset_results: Dict[str, Dict] = {},
164+
deepthought: Optional[bool] = False,
163165
tracer: dict = {},
164166
):
165167
"""
@@ -239,5 +241,6 @@ def converse_anthropic(
239241
system_prompt=system_prompt,
240242
completion_func=completion_func,
241243
max_prompt_size=max_prompt_size,
244+
deepthought=deepthought,
242245
tracer=tracer,
243246
)

src/khoj/processor/conversation/anthropic/utils.py

+48-13
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@
1717
commit_conversation_trace,
1818
get_image_from_url,
1919
)
20-
from khoj.utils import state
2120
from khoj.utils.helpers import (
2221
get_chat_usage_metrics,
23-
in_debug_mode,
2422
is_none_or_empty,
2523
is_promptrace_enabled,
2624
)
@@ -30,7 +28,8 @@
3028
anthropic_clients: Dict[str, anthropic.Anthropic] = {}
3129

3230

33-
DEFAULT_MAX_TOKENS_ANTHROPIC = 3000
31+
DEFAULT_MAX_TOKENS_ANTHROPIC = 8000
32+
MAX_REASONING_TOKENS_ANTHROPIC = 12000
3433

3534

3635
@retry(
@@ -42,12 +41,13 @@
4241
def anthropic_completion_with_backoff(
4342
messages,
4443
system_prompt,
45-
model_name,
44+
model_name: str,
4645
temperature=0,
4746
api_key=None,
4847
model_kwargs=None,
4948
max_tokens=None,
5049
response_type="text",
50+
deepthought=False,
5151
tracer={},
5252
) -> str:
5353
if api_key not in anthropic_clients:
@@ -57,18 +57,24 @@ def anthropic_completion_with_backoff(
5757
client = anthropic_clients[api_key]
5858

5959
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
60-
if response_type == "json_object":
61-
# Prefill model response with '{' to make it output a valid JSON object
60+
aggregated_response = ""
61+
if response_type == "json_object" and not deepthought:
62+
# Prefill model response with '{' to make it output a valid JSON object. Not supported with extended thinking.
6263
formatted_messages += [{"role": "assistant", "content": "{"}]
63-
64-
aggregated_response = "{" if response_type == "json_object" else ""
65-
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
64+
aggregated_response += "{"
6665

6766
final_message = None
6867
model_kwargs = model_kwargs or dict()
6968
if system_prompt:
7069
model_kwargs["system"] = system_prompt
7170

71+
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
72+
if deepthought and model_name.startswith("claude-3-7"):
73+
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
74+
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
75+
# Temperature control not supported when using extended thinking
76+
temperature = 1.0
77+
7278
with client.messages.stream(
7379
messages=formatted_messages,
7480
model=model_name, # type: ignore
@@ -111,20 +117,41 @@ def anthropic_chat_completion_with_backoff(
111117
system_prompt,
112118
max_prompt_size=None,
113119
completion_func=None,
120+
deepthought=False,
114121
model_kwargs=None,
115122
tracer={},
116123
):
117124
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
118125
t = Thread(
119126
target=anthropic_llm_thread,
120-
args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs, tracer),
127+
args=(
128+
g,
129+
messages,
130+
system_prompt,
131+
model_name,
132+
temperature,
133+
api_key,
134+
max_prompt_size,
135+
deepthought,
136+
model_kwargs,
137+
tracer,
138+
),
121139
)
122140
t.start()
123141
return g
124142

125143

126144
def anthropic_llm_thread(
127-
g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None, tracer={}
145+
g,
146+
messages,
147+
system_prompt,
148+
model_name,
149+
temperature,
150+
api_key,
151+
max_prompt_size=None,
152+
deepthought=False,
153+
model_kwargs=None,
154+
tracer={},
128155
):
129156
try:
130157
if api_key not in anthropic_clients:
@@ -133,6 +160,14 @@ def anthropic_llm_thread(
133160
else:
134161
client: anthropic.Anthropic = anthropic_clients[api_key]
135162

163+
model_kwargs = model_kwargs or dict()
164+
max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC
165+
if deepthought and model_name.startswith("claude-3-7"):
166+
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
167+
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
168+
# Temperature control not supported when using extended thinking
169+
temperature = 1.0
170+
136171
formatted_messages: List[anthropic.types.MessageParam] = [
137172
anthropic.types.MessageParam(role=message.role, content=message.content) for message in messages
138173
]
@@ -145,8 +180,8 @@ def anthropic_llm_thread(
145180
temperature=temperature,
146181
system=system_prompt,
147182
timeout=20,
148-
max_tokens=DEFAULT_MAX_TOKENS_ANTHROPIC,
149-
**(model_kwargs or dict()),
183+
max_tokens=max_tokens,
184+
**model_kwargs,
150185
) as stream:
151186
for text in stream.text_stream:
152187
aggregated_response += text

src/khoj/processor/conversation/utils.py

+3
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@
6161
"gemini-1.5-pro": 60000,
6262
# Anthropic Models
6363
"claude-3-5-sonnet-20241022": 60000,
64+
"claude-3-5-sonnet-latest": 60000,
65+
"claude-3-7-sonnet-20250219": 60000,
66+
"claude-3-7-sonnet-latest": 60000,
6467
"claude-3-5-haiku-20241022": 60000,
6568
# Offline Models
6669
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,

src/khoj/routers/helpers.py

+5
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,7 @@ async def send_message_to_model_wrapper(
11251125
query: str,
11261126
system_message: str = "",
11271127
response_type: str = "text",
1128+
deepthought: bool = False,
11281129
user: KhojUser = None,
11291130
query_images: List[str] = None,
11301131
context: str = "",
@@ -1227,6 +1228,7 @@ async def send_message_to_model_wrapper(
12271228
api_key=api_key,
12281229
model=chat_model_name,
12291230
response_type=response_type,
1231+
deepthought=deepthought,
12301232
tracer=tracer,
12311233
)
12321234
elif model_type == ChatModel.ModelType.GOOGLE:
@@ -1425,11 +1427,13 @@ def generate_chat_response(
14251427
)
14261428

14271429
query_to_run = q
1430+
deepthought = False
14281431
if meta_research:
14291432
query_to_run = f"<query>{q}</query>\n<collected_research>\n{meta_research}\n</collected_research>"
14301433
compiled_references = []
14311434
online_results = {}
14321435
code_results = {}
1436+
deepthought = True
14331437

14341438
chat_model = ConversationAdapters.get_valid_chat_model(user, conversation, is_subscribed)
14351439
vision_available = chat_model.vision_enabled
@@ -1513,6 +1517,7 @@ def generate_chat_response(
15131517
generated_files=raw_generated_files,
15141518
generated_asset_results=generated_asset_results,
15151519
program_execution_context=program_execution_context,
1520+
deepthought=deepthought,
15161521
tracer=tracer,
15171522
)
15181523
elif chat_model.model_type == ChatModel.ModelType.GOOGLE:

src/khoj/routers/research.py

+1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ async def apick_next_tool(
9595
query=query,
9696
context=function_planning_prompt,
9797
response_type="json_object",
98+
deepthought=True,
9899
user=user,
99100
query_images=query_images,
100101
query_files=query_files,

src/khoj/utils/constants.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
4949
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
5050
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
51-
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
5251
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
52+
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
53+
"claude-3-5-sonnet-latest": {"input": 3.0, "output": 15.0},
54+
"claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0},
55+
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0},
5356
}

0 commit comments

Comments
 (0)