|
1 | 1 | """ |
2 | | -envs/coding_env/env.py |
3 | | --------------------------------- |
4 | | -Concrete environment implementation using the core BaseEnv. |
5 | | -POC implementation runs code locally via subprocess that can be changed later. |
| 2 | +CodingEnv |
| 3 | +--------- |
| 4 | +Client-side wrapper for the Coding environment server. |
| 5 | +Talks HTTP to a single base_url exposing: /reset and /step. |
| 6 | +
|
| 7 | +- users instantiate CodingEnv with a base_url provided by the higher-level |
| 8 | + vector/orchestration layer. |
| 9 | +- Environment authors ship the Docker image that serves the HTTP API. |
| 10 | +
|
| 11 | +(Seeds, episode IDs, request IDs, capabilities can be added later in the payloads.) |
6 | 12 | """ |
7 | 13 |
|
8 | 14 | from __future__ import annotations |
9 | 15 |
|
10 | | -import subprocess |
11 | 16 | from typing import Optional |
12 | 17 |
|
13 | | -from core.base import BaseEnv |
| 18 | +from core.base_env_client import HTTPEnvClient |
14 | 19 | from core.types import StepResult |
15 | 20 |
|
16 | 21 | from .models import CodeAction, CodeObservation |
17 | 22 |
|
18 | 23 |
|
19 | | -class CodingEnv(BaseEnv[CodeAction, CodeObservation]): |
20 | | - """ |
21 | | - Minimal Coding Environment. |
22 | | -
|
23 | | - POC behavior: |
24 | | - - reset(): returns a fresh, empty observation (no persistent state). |
25 | | - - step(action): runs Python code with `python -c` and returns stdout/stderr/exit_code. |
26 | | -
|
27 | | - Future swap: |
28 | | - Replace _run_code_locally() with a call to your Docker/gateway backend without |
29 | | - changing the public API. |
30 | | - """ |
31 | | - |
| 24 | +class CodingEnv(HTTPEnvClient[CodeAction, CodeObservation]): |
32 | 25 | def __init__( |
33 | 26 | self, |
34 | | - default_timeout_s: float = 10.0, |
35 | | - python_executable: str = "python", |
| 27 | + base_url: str, |
| 28 | + request_timeout_s: float = 15.0, |
36 | 29 | ): |
37 | | - """ |
38 | | - Args: |
39 | | - default_timeout_s: Max seconds to allow code execution before timing out. |
40 | | - python_executable: Interpreter to run (e.g., "python3", a venv path, etc.). |
41 | | - """ |
42 | | - self._default_timeout_s = float(default_timeout_s) |
43 | | - self._python = python_executable |
44 | | - |
45 | | - # --- BaseEnv interface --- |
46 | | - |
47 | | - def reset(self) -> CodeObservation: |
48 | | - # No state to clear in this POC; return an initial observation. |
49 | | - return CodeObservation(stdout="", stderr="", exit_code=0) |
50 | | - |
51 | | - def step(self, action: CodeAction) -> StepResult[CodeObservation]: |
52 | | - if not isinstance(action, CodeAction): |
53 | | - raise TypeError(f"Expected CodeAction, got {type(action)!r}") |
54 | | - |
55 | | - # TODO: replace dummy response with the call to the code executor inside the container |
56 | | - obs, timed_out = CodeObservation(stderr="", stdout="", exit_code=0), False |
57 | | - |
58 | | - # Simple reward heuristic: success and no stderr -> 1.0 else 0.0 |
59 | | - reward: Optional[float] = ( |
60 | | - 1.0 if (obs.exit_code == 0 and not obs.stderr) else 0.0 |
| 30 | + super().__init__( |
| 31 | + base_url=base_url, |
| 32 | + request_timeout_s=request_timeout_s, |
61 | 33 | ) |
62 | 34 |
|
63 | | - info = { |
64 | | - "timed_out": timed_out, |
65 | | - "interpreter": self._python, |
| 35 | + # --- HTTPEnvClient abstract hooks --- |
| 36 | + |
| 37 | + def _step_payload(self, action: CodeAction) -> dict: |
| 38 | + # Shape expected by the server's /step endpoint under "action" |
| 39 | + return { |
| 40 | + "code": action.code, |
66 | 41 | } |
67 | 42 |
|
| 43 | + def _parse_result(self, payload: dict) -> StepResult[CodeObservation]: |
| 44 | + # Expecting: { "observation": {...}, "reward": <float|null>, "done": <bool>, "info": {...} } |
| 45 | + obs = CodeObservation(**payload["observation"]) |
68 | 46 | return StepResult( |
69 | 47 | observation=obs, |
70 | | - reward=reward, |
71 | | - done=False, # Coding env is not episodic by default |
| 48 | + reward=payload.get("reward"), |
| 49 | + done=bool(payload.get("done", False)), |
72 | 50 | ) |
0 commit comments