Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/arrogant-caiman-of-competence.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"stagehand": patch
---

add local cua example, remove root model from types
15 changes: 6 additions & 9 deletions examples/agent_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,11 @@ async def main():
# Build a unified configuration object for Stagehand
config = StagehandConfig(
env="BROWSERBASE",
# env="LOCAL",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
model_name="gpt-4o",
self_heal=True,
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
self_heal=True,
verbose=2,
)

Expand All @@ -51,12 +49,11 @@ async def main():

# Initialize - this creates a new session automatically.
console.print("\n🚀 [info]Initializing Stagehand...[/]")
await stagehand.init()
if stagehand.env == "BROWSERBASE":
console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}")
console.print(
f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]"
)
await stagehand.init()
console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}")
console.print(
f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]"
)

console.print("\n▶️ [highlight] Navigating[/] to Google")
await stagehand.page.goto("https://google.com/")
Expand Down
103 changes: 103 additions & 0 deletions examples/agent_example_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import asyncio
import logging
import os

from dotenv import load_dotenv
from rich.console import Console
from rich.panel import Panel
from rich.theme import Theme

from stagehand import Stagehand, StagehandConfig, configure_logging

# Create a custom theme for consistent styling
custom_theme = Theme(
{
"info": "cyan",
"success": "green",
"warning": "yellow",
"error": "red bold",
"highlight": "magenta",
"url": "blue underline",
}
)

# Create a Rich console instance with our theme
console = Console(theme=custom_theme)

load_dotenv()

# Configure logging with the utility function
configure_logging(
level=logging.INFO, # Set to INFO for regular logs, DEBUG for detailed
quiet_dependencies=True, # Reduce noise from dependencies
)

async def main():
# Build a unified configuration object for Stagehand
config = StagehandConfig(
env="LOCAL",
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
self_heal=True,
verbose=2,
)

# Create a Stagehand client using the configuration object.
stagehand = Stagehand(config)

# Initialize - this creates a new session automatically.
console.print("\n🚀 [info]Initializing Stagehand...[/]")
await stagehand.init()

console.print("\n▶️ [highlight] Navigating[/] to Google")
await stagehand.page.goto("https://google.com/")
console.print("✅ [success]Navigated to Google[/]")

console.print("\n▶️ [highlight] Using Agent to perform a task[/]: playing a game of 2048")
agent = stagehand.agent(
model="gemini-2.5-computer-use-preview-10-2025",
instructions="You are a helpful web navigation assistant that helps users find information. You are currently on the following page: google.com. Do not ask follow up questions, the user will trust your judgement.",
options={"apiKey": os.getenv("GEMINI_API_KEY")}
)
agent_result = await agent.execute(
instruction="Play a game of 2048",
max_steps=20,
auto_screenshot=True,
)

console.print(agent_result)

console.print("📊 [info]Agent execution result:[/]")
console.print(f"🎯 Completed: [bold]{'Yes' if agent_result.completed else 'No'}[/]")
if agent_result.message:
console.print(f"💬 Message: [italic]{agent_result.message}[/]")

if agent_result.actions:
console.print(f"🔄 Actions performed: [bold]{len(agent_result.actions)}[/]")
for i, action in enumerate(agent_result.actions):
action_type = action.type

console.print(f" Action {i+1}: {action_type if action_type else 'Unknown'}")

# For debugging, you can also print the full JSON
console.print("[dim]Full response JSON:[/]")
console.print_json(f"{agent_result.model_dump_json()}")

# Close the session
console.print("\n⏹️ [warning]Closing session...[/]")
await stagehand.close()
console.print("✅ [success]Session closed successfully![/]")
console.rule("[bold]End of Example[/]")


if __name__ == "__main__":
# Add a fancy header
console.print(
"\n",
Panel(
"[light_gray]Stagehand 🤘 Agent Example[/]",
border_style="green",
padding=(1, 10),
),
)
asyncio.run(main())
5 changes: 1 addition & 4 deletions stagehand/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,10 @@ async def execute(
f"Agent execution finished. Success: {agent_result.completed}. Message: {agent_result.message}",
category="agent",
)
# To clean up pydantic model output
actions_repr = [action.root for action in agent_result.actions]
self.logger.debug(
f"Agent actions: {actions_repr}",
f"Agent actions: {agent_result.actions}",
category="agent",
)
agent_result.actions = actions_repr
return agent_result
else:
agent_config_payload = self.config.model_dump(
Expand Down
Loading