Skip to content

Commit 95af407

Browse files
authored
Merge pull request #4 from facebookexternal/env_code
Add BaseHTTPClient to talk to Envs via JSON over RPC.
2 parents 1b6e3ff + 7db1092 commit 95af407

File tree

4 files changed

+121
-50
lines changed

4 files changed

+121
-50
lines changed

src/core/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,7 @@ def step(self, action: ActT) -> StepResult[ObsT]:
4343
StepResult[ObsT]: The resulting observation, reward, done flag, and info.
4444
"""
4545
raise NotImplementedError
46+
47+
def close(self) -> None:
48+
"""Release resources (containers, sessions, etc.)."""
49+
pass

src/core/base_env_client.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""
2+
core/runner_env.py
3+
Minimal HTTP-based environment client.
4+
- Talks to a single env worker exposing: POST /reset, POST /step
5+
6+
Future hooks (commented below) for:
7+
- episode_id, seed on reset
8+
- request_id on step
9+
- custom headers (auth/trace)
10+
"""
11+
12+
from __future__ import annotations
13+
14+
from abc import abstractmethod
15+
from typing import Any, Dict, Generic, Optional, TypeVar
16+
17+
import requests
18+
19+
from .base import BaseEnv
20+
from .types import StepResult
21+
22+
ActT = TypeVar("ActT")
23+
ObsT = TypeVar("ObsT")
24+
25+
26+
class HTTPEnvClient(BaseEnv[ActT, ObsT], Generic[ActT, ObsT]):
27+
def __init__(
28+
self,
29+
base_url: str,
30+
request_timeout_s: float = 15.0,
31+
default_headers: Optional[Dict[str, str]] = None,
32+
):
33+
self._base = base_url.rstrip("/")
34+
self._timeout = float(request_timeout_s)
35+
self._http = requests.Session()
36+
self._headers = default_headers or {}
37+
38+
@abstractmethod
39+
def _step_payload(self, action: ActT) -> dict:
40+
"""Convert an Action object to the JSON body expected by the env server."""
41+
raise NotImplementedError
42+
43+
@abstractmethod
44+
def _parse_result(self, payload: dict) -> StepResult[ObsT]:
45+
"""Convert a JSON response from the env server to StepResult[ObsT]."""
46+
raise NotImplementedError
47+
48+
# ---------- BaseEnv ----------
49+
def reset(self) -> ObsT:
50+
body: Dict[str, Any] = {}
51+
# TODO: later:
52+
# body["seed"] = seed
53+
# body["episode_id"] = episode_id
54+
r = self._http.post(
55+
f"{self._base}/reset",
56+
json=body,
57+
headers=self._headers,
58+
timeout=self._timeout,
59+
)
60+
r.raise_for_status()
61+
return self._parse_result(r.json()).observation
62+
63+
def step(self, action: ActT) -> StepResult[ObsT]:
64+
body: Dict[str, Any] = {
65+
"action": self._step_payload(action),
66+
"timeout_s": int(self._timeout),
67+
}
68+
# TODO: later:
69+
# body["request_id"] = str(uuid.uuid4())
70+
# body["episode_id"] = current_episode_id
71+
r = self._http.post(
72+
f"{self._base}/step",
73+
json=body,
74+
headers=self._headers,
75+
timeout=self._timeout,
76+
)
77+
r.raise_for_status()
78+
return self._parse_result(r.json())
79+
80+
def close(self) -> None:
81+
# nothing to close; higher-level libraries own lifecycles of the endpoints
82+
pass

src/envs/coding_env/env.py

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,50 @@
11
"""
2-
envs/coding_env/env.py
3-
--------------------------------
4-
Concrete environment implementation using the core BaseEnv.
5-
POC implementation runs code locally via subprocess that can be changed later.
2+
CodingEnv
3+
---------
4+
Client-side wrapper for the Coding environment server.
5+
Talks HTTP to a single base_url exposing: /reset and /step.
6+
7+
- users instantiate CodingEnv with a base_url provided by the higher-level
8+
vector/orchestration layer.
9+
- Environment authors ship the Docker image that serves the HTTP API.
10+
11+
(Seeds, episode IDs, request IDs, capabilities can be added later in the payloads.)
612
"""
713

814
from __future__ import annotations
915

10-
import subprocess
1116
from typing import Optional
1217

13-
from core.base import BaseEnv
18+
from core.base_env_client import HTTPEnvClient
1419
from core.types import StepResult
1520

1621
from .models import CodeAction, CodeObservation
1722

1823

19-
class CodingEnv(BaseEnv[CodeAction, CodeObservation]):
20-
"""
21-
Minimal Coding Environment.
22-
23-
POC behavior:
24-
- reset(): returns a fresh, empty observation (no persistent state).
25-
- step(action): runs Python code with `python -c` and returns stdout/stderr/exit_code.
26-
27-
Future swap:
28-
Replace _run_code_locally() with a call to your Docker/gateway backend without
29-
changing the public API.
30-
"""
31-
24+
class CodingEnv(HTTPEnvClient[CodeAction, CodeObservation]):
3225
def __init__(
3326
self,
34-
default_timeout_s: float = 10.0,
35-
python_executable: str = "python",
27+
base_url: str,
28+
request_timeout_s: float = 15.0,
3629
):
37-
"""
38-
Args:
39-
default_timeout_s: Max seconds to allow code execution before timing out.
40-
python_executable: Interpreter to run (e.g., "python3", a venv path, etc.).
41-
"""
42-
self._default_timeout_s = float(default_timeout_s)
43-
self._python = python_executable
44-
45-
# --- BaseEnv interface ---
46-
47-
def reset(self) -> CodeObservation:
48-
# No state to clear in this POC; return an initial observation.
49-
return CodeObservation(stdout="", stderr="", exit_code=0)
50-
51-
def step(self, action: CodeAction) -> StepResult[CodeObservation]:
52-
if not isinstance(action, CodeAction):
53-
raise TypeError(f"Expected CodeAction, got {type(action)!r}")
54-
55-
# TODO: replace dummy response with the call to the code executor inside the container
56-
obs, timed_out = CodeObservation(stderr="", stdout="", exit_code=0), False
57-
58-
# Simple reward heuristic: success and no stderr -> 1.0 else 0.0
59-
reward: Optional[float] = (
60-
1.0 if (obs.exit_code == 0 and not obs.stderr) else 0.0
30+
super().__init__(
31+
base_url=base_url,
32+
request_timeout_s=request_timeout_s,
6133
)
6234

63-
info = {
64-
"timed_out": timed_out,
65-
"interpreter": self._python,
35+
# --- HTTPEnvClient abstract hooks ---
36+
37+
def _step_payload(self, action: CodeAction) -> dict:
38+
# Shape expected by the server's /step endpoint under "action"
39+
return {
40+
"code": action.code,
6641
}
6742

43+
def _parse_result(self, payload: dict) -> StepResult[CodeObservation]:
44+
# Expecting: { "observation": {...}, "reward": <float|null>, "done": <bool>, "info": {...} }
45+
obs = CodeObservation(**payload["observation"])
6846
return StepResult(
6947
observation=obs,
70-
reward=reward,
71-
done=False, # Coding env is not episodic by default
48+
reward=payload.get("reward"),
49+
done=bool(payload.get("done", False)),
7250
)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from envs.coding_env.env import CodingEnv
2+
from envs.coding_env.models import CodeAction
3+
4+
env = CodingEnv(base_url="http://localhost:8080")
5+
obs0 = env.reset()
6+
result = env.step(CodeAction(code="print('hi')"))
7+
print(result.observation.stdout.strip(), result.reward)

0 commit comments

Comments
 (0)