|
38 | 38 | JINA_SEARCH_API_URL = "https://s.jina.ai/"
|
39 | 39 | JINA_API_KEY = os.getenv("JINA_API_KEY")
|
40 | 40 |
|
| 41 | +FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY") |
41 | 42 | FIRECRAWL_USE_LLM_EXTRACT = is_env_var_true("FIRECRAWL_USE_LLM_EXTRACT")
|
42 | 43 |
|
43 | 44 | OLOSTEP_QUERY_PARAMS = {
|
@@ -102,6 +103,9 @@ async def search_online(
|
102 | 103 | if GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID:
|
103 | 104 | search_engine = "Google"
|
104 | 105 | search_engines.append((search_engine, search_with_google))
|
| 106 | + if FIRECRAWL_API_KEY: |
| 107 | + search_engine = "Firecrawl" |
| 108 | + search_engines.append((search_engine, search_with_firecrawl)) |
105 | 109 | if JINA_API_KEY:
|
106 | 110 | search_engine = "Jina"
|
107 | 111 | search_engines.append((search_engine, search_with_jina))
|
@@ -167,6 +171,69 @@ async def search_online(
|
167 | 171 | yield response_dict
|
168 | 172 |
|
169 | 173 |
|
| 174 | +async def search_with_firecrawl(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]: |
| 175 | + """ |
| 176 | + Search using Firecrawl API. |
| 177 | +
|
| 178 | + Args: |
| 179 | + query: The search query string |
| 180 | + location: Location data for geolocation-based search |
| 181 | +
|
| 182 | + Returns: |
| 183 | + Tuple containing the original query and a dictionary of search results |
| 184 | + """ |
| 185 | + # Set up API endpoint and headers |
| 186 | + firecrawl_api_url = "https://api.firecrawl.dev/v1/search" |
| 187 | + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {FIRECRAWL_API_KEY}"} |
| 188 | + |
| 189 | + # Prepare request payload |
| 190 | + country_code = location.country_code.lower() if location and location.country_code else "us" |
| 191 | + payload = { |
| 192 | + "query": query, |
| 193 | + "limit": 10, # Maximum number of results |
| 194 | + "country": country_code, |
| 195 | + "lang": "en", |
| 196 | + "timeout": 10000, |
| 197 | + "scrapeOptions": {}, |
| 198 | + } |
| 199 | + |
| 200 | + # Add location parameter if available |
| 201 | + if location and location.city: |
| 202 | + payload["location"] = f"{location.city}, {location.region}, {location.country}" |
| 203 | + |
| 204 | + async with aiohttp.ClientSession() as session: |
| 205 | + try: |
| 206 | + async with session.post(firecrawl_api_url, headers=headers, json=payload) as response: |
| 207 | + if response.status != 200: |
| 208 | + error_text = await response.text() |
| 209 | + logger.error(f"Firecrawl search failed: {error_text}") |
| 210 | + return query, {} |
| 211 | + |
| 212 | + response_json = await response.json() |
| 213 | + |
| 214 | + if not response_json.get("success", False): |
| 215 | + logger.error(f"Firecrawl search failed: {response_json.get('warning', 'Unknown error')}") |
| 216 | + return query, {} |
| 217 | + |
| 218 | + # Transform Firecrawl response to match the expected format |
| 219 | + organic_results = [] |
| 220 | + for item in response_json.get("data", []): |
| 221 | + organic_results.append( |
| 222 | + { |
| 223 | + "title": item["title"], |
| 224 | + "link": item["url"], |
| 225 | + "snippet": item["description"], |
| 226 | + "content": item.get("markdown", None), |
| 227 | + } |
| 228 | + ) |
| 229 | + |
| 230 | + return query, {"organic": organic_results} |
| 231 | + |
| 232 | + except Exception as e: |
| 233 | + logger.error(f"Error searching with Firecrawl: {str(e)}") |
| 234 | + return query, {} |
| 235 | + |
| 236 | + |
170 | 237 | async def search_with_searxng(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
|
171 | 238 | """Search using local SearXNG instance."""
|
172 | 239 | # Use environment variable or default to localhost
|
|
0 commit comments