Skip to content

Commit d62dd4e

Browse files
committed
Support Firecrawl as a online search provider
1 parent 3939e99 commit d62dd4e

File tree

1 file changed

+67
-0
lines changed

1 file changed

+67
-0
lines changed

src/khoj/processor/tools/online_search.py

+67
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
JINA_SEARCH_API_URL = "https://s.jina.ai/"
3939
JINA_API_KEY = os.getenv("JINA_API_KEY")
4040

41+
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
4142
FIRECRAWL_USE_LLM_EXTRACT = is_env_var_true("FIRECRAWL_USE_LLM_EXTRACT")
4243

4344
OLOSTEP_QUERY_PARAMS = {
@@ -102,6 +103,9 @@ async def search_online(
102103
if GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID:
103104
search_engine = "Google"
104105
search_engines.append((search_engine, search_with_google))
106+
if FIRECRAWL_API_KEY:
107+
search_engine = "Firecrawl"
108+
search_engines.append((search_engine, search_with_firecrawl))
105109
if JINA_API_KEY:
106110
search_engine = "Jina"
107111
search_engines.append((search_engine, search_with_jina))
@@ -167,6 +171,69 @@ async def search_online(
167171
yield response_dict
168172

169173

174+
async def search_with_firecrawl(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
175+
"""
176+
Search using Firecrawl API.
177+
178+
Args:
179+
query: The search query string
180+
location: Location data for geolocation-based search
181+
182+
Returns:
183+
Tuple containing the original query and a dictionary of search results
184+
"""
185+
# Set up API endpoint and headers
186+
firecrawl_api_url = "https://api.firecrawl.dev/v1/search"
187+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {FIRECRAWL_API_KEY}"}
188+
189+
# Prepare request payload
190+
country_code = location.country_code.lower() if location and location.country_code else "us"
191+
payload = {
192+
"query": query,
193+
"limit": 10, # Maximum number of results
194+
"country": country_code,
195+
"lang": "en",
196+
"timeout": 10000,
197+
"scrapeOptions": {},
198+
}
199+
200+
# Add location parameter if available
201+
if location and location.city:
202+
payload["location"] = f"{location.city}, {location.region}, {location.country}"
203+
204+
async with aiohttp.ClientSession() as session:
205+
try:
206+
async with session.post(firecrawl_api_url, headers=headers, json=payload) as response:
207+
if response.status != 200:
208+
error_text = await response.text()
209+
logger.error(f"Firecrawl search failed: {error_text}")
210+
return query, {}
211+
212+
response_json = await response.json()
213+
214+
if not response_json.get("success", False):
215+
logger.error(f"Firecrawl search failed: {response_json.get('warning', 'Unknown error')}")
216+
return query, {}
217+
218+
# Transform Firecrawl response to match the expected format
219+
organic_results = []
220+
for item in response_json.get("data", []):
221+
organic_results.append(
222+
{
223+
"title": item["title"],
224+
"link": item["url"],
225+
"snippet": item["description"],
226+
"content": item.get("markdown", None),
227+
}
228+
)
229+
230+
return query, {"organic": organic_results}
231+
232+
except Exception as e:
233+
logger.error(f"Error searching with Firecrawl: {str(e)}")
234+
return query, {}
235+
236+
170237
async def search_with_searxng(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
171238
"""Search using local SearXNG instance."""
172239
# Use environment variable or default to localhost

0 commit comments

Comments
 (0)