Skip to content

Commit e9928d3

Browse files
committed
Eval more model, control randomization & auto read webpage via workflow
- Control auto read webpage via eval workflow. Prefix env var with KHOJ_ Default to false as it is the default that is going to be used in prod going forward. - Set openai api key via input param in manual eval workflow runs - Simplify evaluating other chat models available over openai compatible api via eval workflow. - Mask input api key as secret in workflow. - Discard unnecessary null setting of env vars. - Control randomization of samples in eval workflow. If randomization is turned off, it'll take the first SAMPLE_SIZE items from the eval dataset instead of a random collection of SAMPLE_SIZE items.
1 parent 911e1bf commit e9928d3

File tree

2 files changed

+34
-7
lines changed

2 files changed

+34
-7
lines changed

.github/workflows/run_evals.yml

+32-4
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,32 @@ on:
5050
required: false
5151
default: 5
5252
type: number
53+
openai_api_key:
54+
description: 'OpenAI API key'
55+
required: false
56+
default: ''
57+
type: string
5358
openai_base_url:
5459
description: 'Base URL of OpenAI compatible API'
5560
required: false
5661
default: ''
5762
type: string
63+
auto_read_webpage:
64+
description: 'Auto read webpage on online search'
65+
required: false
66+
default: 'false'
67+
type: choice
68+
options:
69+
- 'false'
70+
- 'true'
71+
randomize:
72+
description: 'Randomize the sample of questions'
73+
required: false
74+
default: 'true'
75+
type: choice
76+
options:
77+
- 'false'
78+
- 'true'
5879

5980
jobs:
6081
eval:
@@ -92,7 +113,14 @@ jobs:
92113

93114
- name: Get App Version
94115
id: hatch
95-
run: echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT
116+
run: |
117+
# Mask relevant workflow inputs as secret early
118+
OPENAI_API_KEY=$(jq -r '.inputs.openai_api_key' $GITHUB_EVENT_PATH)
119+
echo ::add-mask::$OPENAI_API_KEY
120+
echo OPENAI_API_KEY="$OPENAI_API_KEY" >> $GITHUB_ENV
121+
122+
# Get app version from hatch
123+
echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT
96124
97125
- name: ⏬️ Install Dependencies
98126
env:
@@ -115,13 +143,13 @@ jobs:
115143
KHOJ_MODE: ${{ matrix.khoj_mode }}
116144
SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }}
117145
BATCH_SIZE: "20"
118-
RANDOMIZE: "True"
146+
RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }}
119147
KHOJ_URL: "http://localhost:42110"
120-
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
121148
KHOJ_LLM_SEED: "42"
149+
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
122150
KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 5 }}
151+
KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }}
123152
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
124-
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
125153
OPENAI_BASE_URL: ${{ github.event_name == 'workflow_dispatch' && inputs.openai_base_url || '' }}
126154
SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY || '' }}
127155
OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY || ''}}

src/khoj/processor/tools/online_search.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import json
33
import logging
44
import os
5-
import urllib.parse
65
from collections import defaultdict
76
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
87

@@ -33,7 +32,7 @@
3332
GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY")
3433
GOOGLE_SEARCH_ENGINE_ID = os.getenv("GOOGLE_SEARCH_ENGINE_ID")
3534
SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY")
36-
AUTO_READ_WEBPAGE = is_env_var_true("AUTO_READ_WEBPAGE")
35+
AUTO_READ_WEBPAGE = is_env_var_true("KHOJ_AUTO_READ_WEBPAGE")
3736
SERPER_DEV_URL = "https://google.serper.dev/search"
3837

3938
JINA_SEARCH_API_URL = "https://s.jina.ai/"
@@ -113,14 +112,14 @@ async def search_online(
113112
search_engine = "Searxng"
114113
search_engines.append((search_engine, search_with_searxng))
115114

116-
logger.info(f"🌐 Searching the Internet for {subqueries}")
117115
if send_status_func:
118116
subqueries_str = "\n- " + "\n- ".join(subqueries)
119117
async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"):
120118
yield {ChatEvent.STATUS: event}
121119

122120
response_dict = {}
123121
for search_engine, search_func in search_engines:
122+
logger.info(f"🌐 Searching the Internet with {search_engine} for {subqueries}")
124123
with timer(f"Internet searches with {search_engine} for {subqueries} took", logger):
125124
try:
126125
search_tasks = [search_func(subquery, location) for subquery in subqueries]

0 commit comments

Comments
 (0)