diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml deleted file mode 100644 index cdd3006..0000000 --- a/.github/workflows/claude.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Claude PR Assistant - -on: - issue_comment: - types: [created] - pull_request_review_comment: - types: [created] - issues: - types: [opened, assigned] - pull_request_review: - types: [submitted] - -jobs: - claude-code-action: - if: | - (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || - (github.event_name == 'issues' && contains(github.event.issue.body, '@claude')) - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: read - issues: read - id-token: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Run Claude PR Action - uses: anthropics/claude-code-action@beta - with: - anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - # Or use OAuth token instead: - # claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - timeout_minutes: 60 - # mode: tag # Default: responds to @claude mentions - # Optional: Restrict network access to specific domains only - # experimental_allowed_domains: | - # .anthropic.com - # .github.com - # api.github.com - # .githubusercontent.com - # bun.sh - # registry.npmjs.org - # .blob.core.windows.net \ No newline at end of file diff --git a/.github/workflows/openhands-claude.yml b/.github/workflows/openhands-claude.yml index 93401fa..edb04c6 100644 --- a/.github/workflows/openhands-claude.yml +++ b/.github/workflows/openhands-claude.yml @@ -21,10 +21,11 @@ jobs: call-openhands-claude: uses: All-Hands-AI/OpenHands/.github/workflows/openhands-resolver.yml@main with: + EVAL_MAX_CHARS: ${{ fromJson(vars.EVAL_MAX_CHARS || 15000) }} macro: ${{ vars.OPENHANDS_MACRO || '@openhands-claude' }} - max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 50) }} + max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 10) }} base_container_image: ${{ vars.OPENHANDS_BASE_CONTAINER_IMAGE || '' }} - LLM_MODEL: ${{ vars.LLM_MODEL || 'anthropic/claude-sonnet-4-20250514' }} + LLM_MODEL: ${{ vars.LLM_MODEL || 'anthropic/claude-3-opus-20240229' }} target_branch: ${{ vars.TARGET_BRANCH || 'main' }} runner: ${{ vars.TARGET_RUNNER }} secrets: diff --git a/.github/workflows/openhands-gemini.yml b/.github/workflows/openhands-gemini.yml index 6d277a1..3dca298 100644 --- a/.github/workflows/openhands-gemini.yml +++ b/.github/workflows/openhands-gemini.yml @@ -22,9 +22,10 @@ jobs: uses: All-Hands-AI/OpenHands/.github/workflows/openhands-resolver.yml@main with: macro: ${{ vars.OPENHANDS_MACRO || '@openhands-gemini' }} - max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 50) }} + EVAL_MAX_CHARS: ${{ fromJson(vars.EVAL_MAX_CHARS || 15000) }} + max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 10) }} base_container_image: ${{ vars.OPENHANDS_BASE_CONTAINER_IMAGE || '' }} - LLM_MODEL: ${{ 'gemini/gemini-2.5-pro' }} + LLM_MODEL: ${{ 'gemini/gemini-1.5-pro-latest' }} target_branch: ${{ vars.TARGET_BRANCH || 'main' }} runner: ${{ vars.TARGET_RUNNER }} secrets: diff --git a/.github/workflows/openhands-gpt.yml b/.github/workflows/openhands-gpt.yml index c671205..f86394d 100644 --- a/.github/workflows/openhands-gpt.yml +++ b/.github/workflows/openhands-gpt.yml @@ -22,9 +22,10 @@ jobs: uses: All-Hands-AI/OpenHands/.github/workflows/openhands-resolver.yml@main with: macro: ${{ vars.OPENHANDS_MACRO || '@openhands-gpt' }} - max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 50) }} + EVAL_MAX_CHARS: ${{ fromJson(vars.EVAL_MAX_CHARS || 15000) }} + max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 10) }} base_container_image: ${{ vars.OPENHANDS_BASE_CONTAINER_IMAGE || '' }} - LLM_MODEL: ${{ 'gpt-5-2025-08-07' }} + LLM_MODEL: ${{ 'gpt-4o' }} target_branch: ${{ vars.TARGET_BRANCH || 'main' }} runner: ${{ vars.TARGET_RUNNER }} secrets: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a8214c..76df963 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,10 +9,10 @@ on: jobs: test: name: Python ${{ matrix.python-version }} - runs-on: macos-latest + runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.13'] + python-version: ['3.12'] # EXPOSE SECRETS AS ENV VARS (job-wide) env: @@ -45,4 +45,4 @@ jobs: echo "Test panelist prompt {round} {round_name}" > prompts/panelist.txt - name: Run tests - run: pytest -c pytest.real.ini \ No newline at end of file + run: pytest -c pytest.real.ini diff --git a/.gitignore b/.gitignore index 62d89fc..c30e9dd 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,4 @@ sessions/ # Build build/ dist/ -*.egg-info/ \ No newline at end of file +*.egg-info/storage diff --git a/Makefile b/Makefile deleted file mode 100644 index bb0b438..0000000 --- a/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -.PHONY: install test clean - -install: - pip install -r requirements.txt - -install-dev: - pip install -r requirements.txt - pip install -r requirements-test.txt - -install-all: - pip install -r requirements-dev.txt - -test: - pytest tests/ -v - -test-verbose: - pytest tests/ -vv -s - -clean: - find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true - find . -type f -name "*.pyc" -delete - rm -rf .pytest_cache - rm -rf .coverage - rm -rf htmlcov - -run: - python main.py \ No newline at end of file diff --git a/README.md b/README.md index a09f1e3..35eb51c 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,24 @@ Run the application: python main.py ``` + +## Fix an issue using OpenHands Agent + +You can also use OpenHands agent to fix an issue for you. To trigger the agent, you can leave a comment in the issue thread, with the following format: + +`@ fix this issue` + +For example, to use `openhands-claude` to fix an issue, you can leave a comment like this: + +`@openhands-claude fix this issue` + +The available agents are: +- `openhands-claude` +- `openhands-gemini` +- `openhands-gpt` + +The GitHub actions for these agents are located in `.github/workflows`. + ## Discussion Structure 1. **Round 0 - Agenda Framing**: Moderator introduces the topic diff --git a/llm/anthropic_client.py b/llm/anthropic_client.py deleted file mode 100644 index 338580b..0000000 --- a/llm/anthropic_client.py +++ /dev/null @@ -1,98 +0,0 @@ -try: - import anthropic - ANTHROPIC_AVAILABLE = True -except ImportError: - print("Anthropic library not installed. Install with: pip install anthropic") - ANTHROPIC_AVAILABLE = False - -from typing import List, Dict -from llm.base import LLMClient, retry_with_backoff -import asyncio -import logging - -class ClaudeClient(LLMClient): - def __init__(self, api_key: str): - if not ANTHROPIC_AVAILABLE: - raise ImportError("Anthropic library not installed. Run: pip install anthropic") - - if not api_key or api_key == "your_anthropic_api_key_here" or not api_key.startswith("sk-ant-"): - raise ValueError("Invalid Anthropic API key. Please check your .env file") - - try: - self.client = anthropic.Anthropic(api_key=api_key) - logging.info("Anthropic client initialized successfully") - except Exception as e: - logging.error(f"Failed to initialize Anthropic client: {e}") - raise - - async def generate_response( - self, - system_prompt: str, - messages: List[Dict], - temperature: float = 0.7, - max_tokens: int = 2048 - ) -> str: - async def _generate(): - try: - # Validate inputs - if not system_prompt or not system_prompt.strip(): - raise ValueError("System prompt cannot be empty") - - if not messages: - raise ValueError("Messages list cannot be empty") - - # Validate message format - for i, msg in enumerate(messages): - if not isinstance(msg, dict): - raise ValueError(f"Message {i} must be a dictionary") - if "role" not in msg: - raise ValueError(f"Message {i} missing 'role' field") - if "content" not in msg: - raise ValueError(f"Message {i} missing 'content' field") - if msg["role"] not in ["user", "assistant"]: - raise ValueError(f"Message {i} has invalid role: {msg['role']}") - if not msg["content"] or not msg["content"].strip(): - raise ValueError(f"Message {i} has empty content") - - # Log the request parameters for debugging - logging.info(f"Making Anthropic API request with:") - logging.info(f" Model: claude-opus-4-1-20250805") - logging.info(f" System prompt length: {len(system_prompt)}") - logging.info(f" Messages count: {len(messages)}") - logging.info(f" Temperature: {temperature}") - logging.info(f" Max tokens: {max_tokens}") - - # Log first few messages for debugging - for i, msg in enumerate(messages[:3]): - logging.info(f" Message {i}: role={msg['role']}, content_length={len(msg['content'])}") - - # Convert to sync call wrapped in async - response = await asyncio.to_thread( - self.client.messages.create, - model="claude-opus-4-1-20250805", - system=system_prompt, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - - if response and response.content: - return response.content[0].text - else: - raise ValueError("Empty response from Anthropic API") - - except anthropic.APIError as e: - logging.error(f"Anthropic API error: {e}") - logging.error(f"Error type: {type(e)}") - logging.error(f"Error details: {e.message if hasattr(e, 'message') else 'No message'}") - logging.error(f"Error status: {e.status if hasattr(e, 'status') else 'No status'}") - logging.error(f"Error response: {e.response if hasattr(e, 'response') else 'No response'}") - raise - except Exception as e: - logging.error(f"Unexpected error calling Anthropic: {e}") - logging.error(f"Error type: {type(e)}") - import traceback - logging.error(f"Traceback: {traceback.format_exc()}") - raise - - return await retry_with_backoff(_generate) diff --git a/llm/base.py b/llm/base.py deleted file mode 100644 index e4b7a2e..0000000 --- a/llm/base.py +++ /dev/null @@ -1,53 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List, Dict -import asyncio -import logging -import traceback - -# Set up logging -logging.basicConfig(level=logging.INFO) - -class LLMClient(ABC): - @abstractmethod - async def generate_response( - self, - system_prompt: str, - messages: List[Dict], - temperature: float = 0.7, - max_tokens: int = 2048 - ) -> str: - pass - -async def retry_with_backoff( - func, - max_retries: int = 3, - base_delay: float = 1.0 -) -> str: - """Retry with exponential backoff and better error reporting""" - last_error = None - - for attempt in range(max_retries): - try: - result = await func() - if result: # Only return if we got a valid result - return result - else: - raise ValueError("Empty response from API") - except Exception as e: - last_error = e - error_details = traceback.format_exc() - - if attempt == max_retries - 1: - logging.error(f"Max retries exceeded. Last error: {e}") - logging.error(f"Full traceback: {error_details}") - raise e - - delay = base_delay * (2 ** attempt) - logging.warning(f"Attempt {attempt + 1} failed: {e}") - logging.warning(f"Retrying in {delay}s...") - await asyncio.sleep(delay) - - # This should never be reached, but just in case - if last_error: - raise last_error - return "" diff --git a/llm/google_client.py b/llm/google_client.py deleted file mode 100644 index d8913ab..0000000 --- a/llm/google_client.py +++ /dev/null @@ -1,62 +0,0 @@ -try: - import google.generativeai as genai - GOOGLE_AVAILABLE = True -except ImportError: - print("Google Generative AI library not installed. Install with: pip install google-generativeai") - GOOGLE_AVAILABLE = False - -from typing import List, Dict -from llm.base import LLMClient, retry_with_backoff -import asyncio -import logging - -class GeminiClient(LLMClient): - def __init__(self, api_key: str): - if not GOOGLE_AVAILABLE: - raise ImportError("Google Generative AI library not installed. Run: pip install google-generativeai") - - if not api_key or api_key == "your_google_api_key_here" or not api_key.startswith("AIza"): - raise ValueError("Invalid Google API key. Please check your .env file") - - try: - genai.configure(api_key=api_key) - self.model = genai.GenerativeModel('gemini-2.5-pro') - logging.info("Google Gemini client initialized successfully") - except Exception as e: - logging.error(f"Failed to initialize Google client: {e}") - raise - - async def generate_response( - self, - system_prompt: str, - messages: List[Dict], - temperature: float = 0.7, - max_tokens: int = 2048 - ) -> str: - async def _generate(): - try: - # Format messages for Gemini - formatted_prompt = system_prompt + "\n\n" - for msg in messages: - formatted_prompt += f"{msg['role']}: {msg['content']}\n\n" - - # Convert to sync call wrapped in async - response = await asyncio.to_thread( - self.model.generate_content, - formatted_prompt, - generation_config=genai.GenerationConfig( - temperature=temperature, - max_output_tokens=max_tokens - ) - ) - - if response and response.text: - return response.text - else: - raise ValueError("Empty response from Google API") - - except Exception as e: - logging.error(f"Error calling Google Gemini: {e}") - raise - - return await retry_with_backoff(_generate) diff --git a/ui/__init__.py b/ui/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ui/terminal.py b/ui/terminal.py deleted file mode 100644 index 073a09d..0000000 --- a/ui/terminal.py +++ /dev/null @@ -1,139 +0,0 @@ -import os -import sys -import termios -import tty -from typing import Optional -from rich.console import Console -from rich.panel import Panel -from rich.table import Table -from rich.progress import Progress, SpinnerColumn, TextColumn -from rich.markdown import Markdown -from rich.prompt import Prompt -from models.discussion import DiscussionState, Round, Message - -class TerminalUI: - def __init__(self): - self.console = Console() - self.round_names = { - Round.AGENDA: "Agenda Framing", - Round.EVIDENCE: "Evidence Presentation", - Round.CROSS_EXAMINATION: "Cross-Examination", - Round.CONVERGENCE: "Convergence" - } - self.participant_colors = { - "gpt5": "cyan", - "claude": "green", - "gemini": "yellow", - "claude_moderator": "magenta" - } - - def clear_screen(self): - os.system('cls' if os.name == 'nt' else 'clear') - - def display_header(self, topic: str, current_round: Round): - """Display discussion header""" - self.console.print("\n[bold white]═══ ROUNDTABLE ═══[/bold white]\n", justify="center") - self.console.print(f"[dim]Topic:[/dim] [bold]{topic}[/bold]\n", justify="center") - - progress_bar = "" - for r in Round: - if r.value < current_round.value: - progress_bar += "●" - elif r.value == current_round.value: - progress_bar += "◉" - else: - progress_bar += "○" - if r.value < 3: - progress_bar += " " - - self.console.print(f"[bold]{self.round_names[current_round]}[/bold]", justify="center") - self.console.print(f"[dim]{progress_bar}[/dim]\n", justify="center") - self.console.print("─" * 80) - - def display_message(self, message: Message): - """Display a single message""" - color = self.participant_colors.get(message.participant_id, "white") - role_badge = "🎯 MOD" if message.role.value == "moderator" else "💭" - - panel_title = f"{role_badge} {message.participant_model}" - content = Markdown(message.content) - - panel = Panel( - content, - title=panel_title, - title_align="left", - border_style=color, - padding=(1, 2) - ) - self.console.print(panel) - self.console.print() - - def display_thinking(self, participant: str): - """Show thinking indicator""" - color = self.participant_colors.get(participant, "white") - self.console.print(f"[{color}]{participant}[/{color}] is formulating response...") - - def display_round_transition(self, from_round: Round, to_round: Round): - """Display round transition""" - self.console.print("\n" + "="*80) - self.console.print( - f"[bold green]✓ Completed:[/bold green] {self.round_names[from_round]}" - ) - self.console.print( - f"[bold blue]→ Starting:[/bold blue] {self.round_names[to_round]}" - ) - self.console.print("="*80 + "\n") - - def get_topic_input(self) -> str: - """Get discussion topic from user""" - self.clear_screen() - self.console.print("\n[bold white]═══ ROUNDTABLE ═══[/bold white]\n", justify="center") - self.console.print("[dim]A Socratic discussion platform for LLMs[/dim]\n", justify="center") - - topic = Prompt.ask("\n[bold cyan]Enter discussion topic[/bold cyan]") - return topic - - def display_menu(self) -> str: - """Display main menu""" - self.clear_screen() - self.console.print("\n[bold white]═══ ROUNDTABLE ═══[/bold white]\n", justify="center") - - table = Table(show_header=False, box=None) - table.add_column("Option", style="cyan", width=3) - table.add_column("Description") - - table.add_row("1", "Start New Discussion") - table.add_row("2", "Load Previous Discussion") - table.add_row("3", "Exit") - - self.console.print(table) - choice = Prompt.ask("\n[bold]Select option[/bold]", choices=["1", "2", "3"]) - return choice - - def display_final_consensus(self, consensus: str): - """Display final consensus""" - self.console.print("\n" + "="*80) - self.console.print("[bold green]═══ FINAL CONSENSUS ═══[/bold green]", justify="center") - self.console.print("="*80 + "\n") - - panel = Panel( - Markdown(consensus), - border_style="green", - padding=(1, 2) - ) - self.console.print(panel) - - def get_single_keypress(self) -> str: - """Get a single keypress without requiring Enter""" - if os.name == 'nt': # Windows - import msvcrt - return msvcrt.getch().decode('utf-8').lower() - else: # Unix/Linux/macOS - fd = sys.stdin.fileno() - old_settings = termios.tcgetattr(fd) - try: - tty.setraw(sys.stdin.fileno()) - ch = sys.stdin.read(1).lower() - return ch - finally: - termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)