Skip to content

Commit 425f0f3

Browse files
committed
cubic-dev-ai comments
1 parent 3721276 commit 425f0f3

File tree

7 files changed

+86
-162
lines changed

7 files changed

+86
-162
lines changed

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,13 @@ Activate the virtual environment:
5555
source .venv/bin/activate
5656
```
5757

58-
#### Step 3: Configure Environment
58+
#### Step 3: Install Python Packages
59+
Install the required Python packages using uv:
60+
```bash
61+
uv pip install -r requirements.txt
62+
```
63+
64+
#### Step 4: Configure Environment
5965
1. Create a copy of the example environment file:
6066
- Windows (Command Prompt):
6167
```bash
@@ -67,7 +73,7 @@ cp .env.example .env
6773
```
6874
2. Open `.env` in your preferred text editor and add your API keys and other settings
6975

70-
#### Step 4: Enjoy the web-ui
76+
#### Step 5: Enjoy the web-ui
7177
1. **Run the WebUI:**
7278
```bash
7379
python webui.py --ip 127.0.0.1 --port 7788

src/browser/browser_compat.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@ def __init__(self,
4545
self.save_recording_path = save_recording_path
4646
self.save_downloads_path = save_downloads_path
4747
self._extra = kwargs
48+
49+
def model_dump(self) -> Dict[str, Any]:
50+
"""Compatibility method for pydantic model_dump"""
51+
return {
52+
'window_width': self.window_width,
53+
'window_height': self.window_height,
54+
'trace_path': self.trace_path,
55+
'save_recording_path': self.save_recording_path,
56+
'save_downloads_path': self.save_downloads_path,
57+
**self._extra
58+
}
4859

4960
class BrowserState:
5061
"""Compatibility shim for BrowserState"""

src/browser/custom_browser.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,19 @@
55
from browser_use.browser.profile import BrowserProfile
66
import logging
77

8-
# Chrome args and utils imports need to be updated for browser_use 0.6.0
9-
# These modules have been restructured in the new version
8+
# Updated imports for browser_use 0.6.0
9+
from browser_use.browser.profile import (
10+
CHROME_DEFAULT_ARGS,
11+
CHROME_HEADLESS_ARGS,
12+
CHROME_DOCKER_ARGS,
13+
CHROME_DISABLE_SECURITY_ARGS,
14+
CHROME_DETERMINISTIC_RENDERING_ARGS,
15+
get_display_size,
16+
get_window_adjustments,
17+
)
18+
from browser_use.config import CONFIG
1019
from browser_use.utils import time_execution_async
20+
from .browser_compat import BrowserContextConfig
1121
import socket
1222

1323
from .custom_context import CustomBrowserContext
@@ -44,13 +54,14 @@ async def _setup_builtin_browser(self, playwright_instance) -> object:
4454
screen_size = {'width': 1920, 'height': 1080}
4555
offset_x, offset_y = 0, 0
4656
else:
47-
screen_size = get_screen_resolution()
57+
display_size = get_display_size()
58+
screen_size = {'width': display_size.width, 'height': display_size.height} if display_size else {'width': 1920, 'height': 1080}
4859
offset_x, offset_y = get_window_adjustments()
4960

5061
chrome_args = {
5162
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
52-
*CHROME_ARGS,
53-
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
63+
*CHROME_DEFAULT_ARGS,
64+
*(CHROME_DOCKER_ARGS if CONFIG.IN_DOCKER else []),
5465
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
5566
*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
5667
*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),

src/utils/llm_provider.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,16 @@ def get_llm_model(provider: str, **kwargs) -> BaseChatModel:
100100
)
101101

102102
# For providers not directly supported by browser-use, use OpenAI-compatible API
103-
elif provider in ["grok", "alibaba", "moonshot", "unbound", "siliconflow", "modelscope"]:
103+
elif provider in ["grok", "alibaba", "moonshot", "unbound", "siliconflow", "modelscope", "mistral", "ibm"]:
104104
base_url_map = {
105105
"grok": os.getenv("GROK_ENDPOINT", "https://api.x.ai/v1"),
106106
"alibaba": os.getenv("ALIBABA_ENDPOINT", "https://dashscope.aliyuncs.com/compatible-mode/v1"),
107107
"moonshot": os.getenv("MOONSHOT_ENDPOINT"),
108108
"unbound": os.getenv("UNBOUND_ENDPOINT", "https://api.getunbound.ai"),
109109
"siliconflow": os.getenv("SILICONFLOW_ENDPOINT", ""),
110-
"modelscope": os.getenv("MODELSCOPE_ENDPOINT", "")
110+
"modelscope": os.getenv("MODELSCOPE_ENDPOINT", ""),
111+
"mistral": os.getenv("MISTRAL_ENDPOINT", "https://api.mistral.ai/v1"),
112+
"ibm": os.getenv("IBM_ENDPOINT", "https://us-south.ml.cloud.ibm.com")
111113
}
112114

113115
model_defaults = {
@@ -116,19 +118,29 @@ def get_llm_model(provider: str, **kwargs) -> BaseChatModel:
116118
"moonshot": "moonshot-v1-32k-vision-preview",
117119
"unbound": "gpt-4o-mini",
118120
"siliconflow": "Qwen/QwQ-32B",
119-
"modelscope": "Qwen/QwQ-32B"
121+
"modelscope": "Qwen/QwQ-32B",
122+
"mistral": "pixtral-large-latest",
123+
"ibm": "ibm/granite-vision-3.1-2b-preview"
120124
}
121125

122126
base_url = kwargs.get("base_url") or base_url_map[provider]
123127
if not base_url:
124128
raise ValueError(f"{provider} endpoint is required")
129+
130+
# Special handling for IBM which may require project_id in headers
131+
extra_headers = {}
132+
if provider == "ibm":
133+
project_id = kwargs.get("project_id") or os.getenv("IBM_PROJECT_ID")
134+
if project_id:
135+
extra_headers["X-Project-ID"] = project_id
125136

126137
return ChatOpenAI(
127138
model=kwargs.get("model_name", model_defaults[provider]),
128139
temperature=kwargs.get("temperature", 0.2),
129140
base_url=base_url,
130141
api_key=api_key,
142+
extra_headers=extra_headers if extra_headers else None,
131143
)
132144

133145
else:
134-
raise ValueError(f"Unsupported provider: {provider}. Supported providers: anthropic, openai, google, groq, ollama, azure_openai, deepseek, grok, alibaba, moonshot, unbound, siliconflow, modelscope")
146+
raise ValueError(f"Unsupported provider: {provider}. Supported providers: anthropic, openai, google, groq, ollama, azure_openai, deepseek, grok, alibaba, moonshot, unbound, siliconflow, modelscope, mistral, ibm")

src/webui/components/deep_research_agent_tab.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -256,20 +256,34 @@ def get_setting(tab: str, key: str, default: Any = None):
256256

257257
# --- 7. Task Finalization ---
258258
logger.info("Agent task processing finished. Awaiting final result...")
259-
final_result_dict = await agent_task # Get result or raise exception
260-
logger.info(f"Agent run completed. Result keys: {final_result_dict.keys() if final_result_dict else 'None'}")
259+
final_result_path = await agent_task # Get result path or raise exception
260+
logger.info(f"Agent run completed. Result path: {final_result_path}")
261261

262-
# Try to get task ID from result if not known before
263-
if not running_task_id and final_result_dict and 'task_id' in final_result_dict:
264-
running_task_id = final_result_dict['task_id']
262+
# Try to get task ID from agent's current state if not known before
263+
if not running_task_id and webui_manager.dr_agent.current_task_id:
264+
running_task_id = webui_manager.dr_agent.current_task_id
265265
webui_manager.dr_task_id = running_task_id
266266
task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
267267
report_file_path = os.path.join(task_specific_dir, "report.md")
268-
logger.info(f"Task ID confirmed from result: {running_task_id}")
268+
logger.info(f"Task ID confirmed from agent state: {running_task_id}")
269269

270270
final_ui_update = {}
271-
if report_file_path and os.path.exists(report_file_path):
272-
logger.info(f"Loading final report from: {report_file_path}")
271+
272+
# Use the returned report path directly
273+
if final_result_path and os.path.exists(final_result_path):
274+
logger.info(f"Loading final report from returned path: {final_result_path}")
275+
report_content = _read_file_safe(final_result_path)
276+
if report_content:
277+
final_ui_update[markdown_display_comp] = gr.update(value=report_content)
278+
final_ui_update[markdown_download_comp] = gr.File(value=final_result_path,
279+
label=f"Report ({running_task_id or 'research'}.md)",
280+
interactive=True)
281+
else:
282+
final_ui_update[markdown_display_comp] = gr.update(
283+
value="# Research Complete\n\n*Error reading final report file.*")
284+
elif report_file_path and os.path.exists(report_file_path):
285+
# Fallback to expected report path if direct path doesn't work
286+
logger.info(f"Loading final report from expected path: {report_file_path}")
273287
report_content = _read_file_safe(report_file_path)
274288
if report_content:
275289
final_ui_update[markdown_display_comp] = gr.update(value=report_content)
@@ -279,15 +293,8 @@ def get_setting(tab: str, key: str, default: Any = None):
279293
else:
280294
final_ui_update[markdown_display_comp] = gr.update(
281295
value="# Research Complete\n\n*Error reading final report file.*")
282-
elif final_result_dict and 'report' in final_result_dict:
283-
logger.info("Using report content directly from agent result.")
284-
# If agent directly returns report content
285-
final_ui_update[markdown_display_comp] = gr.update(value=final_result_dict['report'])
286-
# Cannot offer download if only content is available
287-
final_ui_update[markdown_download_comp] = gr.update(value=None, label="Download Research Report",
288-
interactive=False)
289296
else:
290-
logger.warning("Final report file not found and not in result dict.")
297+
logger.warning("Final report file not found at returned path or expected location.")
291298
final_ui_update[markdown_display_comp] = gr.update(value="# Research Complete\n\n*Final report not found.*")
292299

293300
yield final_ui_update

tests/test_controller.py

Lines changed: 7 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,136 +1,10 @@
1-
import asyncio
2-
import pdb
3-
import sys
4-
import time
1+
# Test file for controller functionality
52

6-
sys.path.append(".")
73

8-
from dotenv import load_dotenv
4+
# MCP functionality has been removed from the application.
5+
# Controller tests related to MCP have been removed since the functionality no longer exists.
6+
# This file is kept for potential future controller tests.
97

10-
load_dotenv()
11-
12-
13-
async def test_mcp_client():
14-
# MCP functionality removed - test disabled
15-
print("MCP functionality has been removed from the application")
16-
return
17-
18-
test_server_config = {
19-
"mcpServers": {
20-
# "markitdown": {
21-
# "command": "docker",
22-
# "args": [
23-
# "run",
24-
# "--rm",
25-
# "-i",
26-
# "markitdown-mcp:latest"
27-
# ]
28-
# },
29-
"desktop-commander": {
30-
"command": "npx",
31-
"args": [
32-
"-y",
33-
"@wonderwhy-er/desktop-commander"
34-
]
35-
},
36-
# "filesystem": {
37-
# "command": "npx",
38-
# "args": [
39-
# "-y",
40-
# "@modelcontextprotocol/server-filesystem",
41-
# "/Users/xxx/ai_workspace",
42-
# ]
43-
# },
44-
}
45-
}
46-
47-
mcp_tools, mcp_client = await setup_mcp_client_and_tools(test_server_config)
48-
49-
for tool in mcp_tools:
50-
tool_param_model = create_tool_param_model(tool)
51-
print(tool.name)
52-
print(tool.description)
53-
print(tool_param_model.model_json_schema())
54-
pdb.set_trace()
55-
56-
57-
async def test_controller_with_mcp():
58-
# MCP functionality removed - test disabled
59-
print("MCP functionality has been removed from the application")
60-
return
61-
import os
62-
from src.controller.custom_controller import CustomController
63-
from browser_use.controller.registry.views import ActionModel
64-
65-
mcp_server_config = {
66-
"mcpServers": {
67-
# "markitdown": {
68-
# "command": "docker",
69-
# "args": [
70-
# "run",
71-
# "--rm",
72-
# "-i",
73-
# "markitdown-mcp:latest"
74-
# ]
75-
# },
76-
"desktop-commander": {
77-
"command": "npx",
78-
"args": [
79-
"-y",
80-
"@wonderwhy-er/desktop-commander"
81-
]
82-
},
83-
# "filesystem": {
84-
# "command": "npx",
85-
# "args": [
86-
# "-y",
87-
# "@modelcontextprotocol/server-filesystem",
88-
# "/Users/xxx/ai_workspace",
89-
# ]
90-
# },
91-
}
92-
}
93-
94-
controller = CustomController()
95-
await controller.setup_mcp_client(mcp_server_config)
96-
action_name = "mcp.desktop-commander.execute_command"
97-
action_info = controller.registry.registry.actions[action_name]
98-
param_model = action_info.param_model
99-
print(param_model.model_json_schema())
100-
params = {"command": f"python ./tmp/test.py"
101-
}
102-
validated_params = param_model(**params)
103-
ActionModel_ = controller.registry.create_action_model()
104-
# Create ActionModel instance with the validated parameters
105-
action_model = ActionModel_(**{action_name: validated_params})
106-
result = await controller.act(action_model)
107-
result = result.extracted_content
108-
print(result)
109-
if result and "Command is still running. Use read_output to get more output." in result and "PID" in \
110-
result.split("\n")[0]:
111-
pid = int(result.split("\n")[0].split("PID")[-1].strip())
112-
action_name = "mcp.desktop-commander.read_output"
113-
action_info = controller.registry.registry.actions[action_name]
114-
param_model = action_info.param_model
115-
print(param_model.model_json_schema())
116-
params = {"pid": pid}
117-
validated_params = param_model(**params)
118-
action_model = ActionModel_(**{action_name: validated_params})
119-
output_result = ""
120-
while True:
121-
time.sleep(1)
122-
result = await controller.act(action_model)
123-
result = result.extracted_content
124-
if result:
125-
pdb.set_trace()
126-
output_result = result
127-
break
128-
print(output_result)
129-
pdb.set_trace()
130-
await controller.close_mcp_client()
131-
pdb.set_trace()
132-
133-
134-
if __name__ == '__main__':
135-
# asyncio.run(test_mcp_client())
136-
asyncio.run(test_controller_with_mcp())
8+
def test_placeholder():
9+
"""Placeholder test to maintain test file structure."""
10+
pass

tests/test_llm_api.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,13 @@ def test_llm(config, query, image_path=None, system_message=None):
6868
messages = []
6969
if system_message:
7070
messages.append(SystemMessage(content=system_message))
71-
messages.append(UserMessage(content=query))
71+
72+
# Use create_message_content to handle both text and image content
73+
user_content = create_message_content(query, image_path) if image_path else query
74+
messages.append(UserMessage(content=user_content))
7275

7376
# Call the LLM
74-
ai_msg = llm.ainvoke(messages)
77+
ai_msg = llm.invoke(messages)
7578

7679
# Handle different response types
7780
if hasattr(ai_msg, "reasoning_content"):

0 commit comments

Comments
 (0)