Skip to content

Commit cd21f77

Browse files
committed
ci(hw): Generate error reports if no runner with tags
1 parent a906d01 commit cd21f77

File tree

3 files changed

+356
-12
lines changed

3 files changed

+356
-12
lines changed

.gitlab/scripts/gen_hw_jobs.py

Lines changed: 164 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
import copy
99
import traceback
1010
from pathlib import Path
11+
from typing import Iterable
12+
from urllib.parse import urlencode
13+
import urllib.request
14+
import urllib.error
1115

1216
# Resolve repository root from this script location
1317
SCRIPT_DIR = Path(__file__).resolve().parent
@@ -184,6 +188,109 @@ def parse_list_arg(s: str) -> list[str]:
184188
return [part.strip() for part in txt.split(",") if part.strip()]
185189

186190

191+
def _gitlab_auth_header() -> tuple[str, str]:
192+
"""Return header key and value for GitLab API auth, preferring PRIVATE-TOKEN, then JOB-TOKEN.
193+
194+
Falls back to empty auth if neither is available.
195+
"""
196+
private = os.environ.get("GITLAB_API_TOKEN") or os.environ.get("PRIVATE_TOKEN")
197+
if private:
198+
return ("PRIVATE-TOKEN", private)
199+
job = os.environ.get("CI_JOB_TOKEN")
200+
if job:
201+
return ("JOB-TOKEN", job)
202+
return ("", "")
203+
204+
205+
def _gitlab_api_get(path: str) -> tuple[int, dict | list | None]:
206+
"""Perform a GET to GitLab API v4 and return (status_code, json_obj_or_None).
207+
208+
Uses project-level API base from CI env. Returns (0, None) if base env is missing.
209+
"""
210+
base = os.environ.get("CI_API_V4_URL")
211+
if not base:
212+
return 0, None
213+
url = base.rstrip("/") + "/" + path.lstrip("/")
214+
key, value = _gitlab_auth_header()
215+
req = urllib.request.Request(url)
216+
if key:
217+
req.add_header(key, value)
218+
try:
219+
with urllib.request.urlopen(req, timeout=15) as resp:
220+
status = resp.getcode()
221+
data = resp.read()
222+
try:
223+
obj = json.loads(data.decode("utf-8")) if data else None
224+
except Exception:
225+
obj = None
226+
return status, obj
227+
except urllib.error.HTTPError as e:
228+
try:
229+
body = e.read().decode("utf-8")
230+
except Exception:
231+
body = str(e)
232+
sys.stderr.write(f"[WARN] GitLab API GET {url} failed: {e} body={body}\n")
233+
return e.code, None
234+
except Exception as e:
235+
sys.stderr.write(f"[WARN] GitLab API GET {url} error: {e}\n")
236+
sys.stderr.write(traceback.format_exc() + "\n")
237+
return -1, None
238+
239+
240+
def list_project_runners() -> list[dict]:
241+
"""List runners available to this project via GitLab API.
242+
243+
Requires CI vars CI_API_V4_URL and CI_PROJECT_ID and either GITLAB_API_TOKEN or CI_JOB_TOKEN.
244+
Returns an empty list if not accessible.
245+
"""
246+
project_id = os.environ.get("CI_PROJECT_ID")
247+
if not project_id:
248+
return []
249+
250+
runners: list[dict] = []
251+
page = 1
252+
per_page = 100
253+
while True:
254+
q = urlencode({"per_page": per_page, "page": page})
255+
status, obj = _gitlab_api_get(f"projects/{project_id}/runners?{q}")
256+
if status != 200 or not isinstance(obj, list):
257+
# Project-scoped listing might be restricted for JOB-TOKEN in some instances.
258+
# Return what we have (likely nothing) and let caller decide.
259+
break
260+
runners.extend(x for x in obj if isinstance(x, dict))
261+
if len(obj) < per_page:
262+
break
263+
page += 1
264+
return runners
265+
266+
267+
def runner_supports_tags(runner: dict, required_tags: Iterable[str]) -> bool:
268+
tag_list = runner.get("tag_list") or []
269+
if not isinstance(tag_list, list):
270+
return False
271+
tags = {str(t).strip() for t in tag_list if isinstance(t, str) and t.strip()}
272+
if not tags:
273+
return False
274+
# Skip paused/inactive runners
275+
if runner.get("paused") is True:
276+
return False
277+
if runner.get("active") is False:
278+
return False
279+
return all(t in tags for t in required_tags)
280+
281+
282+
def any_runner_matches(required_tags: Iterable[str], runners: list[dict]) -> bool:
283+
req = [t for t in required_tags if t]
284+
for r in runners:
285+
try:
286+
if runner_supports_tags(r, req):
287+
return True
288+
except Exception:
289+
# Be robust to unexpected runner payloads
290+
continue
291+
return False
292+
293+
187294
def main():
188295
ap = argparse.ArgumentParser()
189296
ap.add_argument("--chips", required=True, help="Comma-separated or JSON array list of SoCs")
@@ -249,25 +356,72 @@ def main():
249356

250357
# Build child pipeline YAML in deterministic order
251358
jobs_entries = [] # list of (sort_key, job_name, job_dict)
359+
360+
# Discover available runners (best-effort)
361+
available_runners = list_project_runners()
362+
if not available_runners:
363+
print("[WARN] Could not enumerate project runners or none found; skipping runner-tag availability checks.")
364+
365+
# Accumulate all missing-runner groups to emit a single stub job
366+
missing_groups: list[dict] = []
367+
252368
for (chip, tagset, test_type), test_dirs in group_map.items():
253369
tag_list = sorted(tagset)
254370
# Build name suffix excluding the SOC itself to avoid duplication
255371
non_soc_tags = [t for t in tag_list if t != chip]
256372
tag_suffix = "-".join(non_soc_tags) if non_soc_tags else "generic"
257-
job_name = f"hw-{chip}-{test_type}-{tag_suffix}"[:255]
258373

259-
# Clone base job and adjust (preserve key order using deepcopy)
374+
# Determine if any runner can serve this job
375+
can_schedule = True
376+
if available_runners:
377+
can_schedule = any_runner_matches(tag_list, available_runners)
378+
379+
if can_schedule:
380+
job_name = f"hw-{chip}-{test_type}-{tag_suffix}"[:255]
381+
382+
# Clone base job and adjust (preserve key order using deepcopy)
383+
job = copy.deepcopy(base_job)
384+
# Ensure tags include SOC+extras
385+
job["tags"] = tag_list
386+
vars_block = job.get("variables", {})
387+
vars_block["TEST_CHIP"] = chip
388+
vars_block["TEST_TYPE"] = test_type
389+
# Provide list of test directories for this job
390+
vars_block["TEST_LIST"] = "\n".join(sorted(test_dirs))
391+
job["variables"] = vars_block
392+
393+
sort_key = (chip, test_type, tag_suffix)
394+
jobs_entries.append((sort_key, job_name, job))
395+
else:
396+
# Accumulate for a single combined missing-runner job
397+
missing_groups.append(
398+
{
399+
"chip": chip,
400+
"test_type": test_type,
401+
"required_tags": tag_list,
402+
"test_dirs": sorted(test_dirs),
403+
}
404+
)
405+
406+
# If any groups are missing runners, create one combined stub job to emit all JUnit errors
407+
if missing_groups:
408+
job_name = "hw-missing-runners"
260409
job = copy.deepcopy(base_job)
261-
# Ensure tags include SOC+extras
262-
job["tags"] = tag_list
410+
if "tags" in job:
411+
del job["tags"]
412+
job["before_script"] = [
413+
"echo 'No suitable hardware runners found for some groups; generating combined JUnit error stubs.'"
414+
]
263415
vars_block = job.get("variables", {})
264-
vars_block["TEST_CHIP"] = chip
265-
vars_block["TEST_TYPE"] = test_type
266-
# Provide list of test directories for this job
267-
vars_block["TEST_LIST"] = "\n".join(sorted(test_dirs))
416+
# Store as JSON string for the generator script to process
417+
vars_block["MISSING_GROUPS_JSON"] = json.dumps(missing_groups)
268418
job["variables"] = vars_block
269-
270-
sort_key = (chip, test_type, tag_suffix)
419+
job["script"] = [
420+
"python3 .gitlab/scripts/generate_missing_runner_junit.py",
421+
"exit 1",
422+
]
423+
# Ensure it sorts after normal jobs
424+
sort_key = ("zzz", "zzz", "zzz")
271425
jobs_entries.append((sort_key, job_name, job))
272426

273427
# Order jobs by (chip, type, tag_suffix)
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#!/usr/bin/env python3
2+
3+
import os
4+
import sys
5+
import json
6+
from pathlib import Path
7+
import xml.etree.ElementTree as ET
8+
from typing import Optional
9+
10+
11+
def read_env_list(name: str) -> list[str]:
12+
raw = os.environ.get(name, "")
13+
return [item.strip() for item in raw.splitlines() if item.strip()]
14+
15+
16+
def write_single_suite(out_path: Path, suite_name: str, testcase_name: str, error_message: str) -> None:
17+
out_path.parent.mkdir(parents=True, exist_ok=True)
18+
suite = ET.Element(
19+
"testsuite",
20+
attrib={
21+
"name": suite_name,
22+
"tests": "1",
23+
"errors": "1",
24+
"failures": "0",
25+
},
26+
)
27+
tc = ET.SubElement(
28+
suite,
29+
"testcase",
30+
attrib={"classname": "hardware.missing_runner", "name": testcase_name},
31+
)
32+
err = ET.SubElement(
33+
tc,
34+
"error",
35+
attrib={"message": error_message},
36+
)
37+
err.text = (
38+
"The hardware test could not be scheduled because no runner with the "
39+
"required tag combination is online/available."
40+
)
41+
ET.ElementTree(suite).write(out_path, encoding="utf-8", xml_declaration=True)
42+
43+
44+
def _leading_spaces_count(s: str) -> int:
45+
return len(s) - len(s.lstrip(" "))
46+
47+
48+
def _manual_parse_fqbn_length(ci_text: str, chip: str) -> Optional[int]:
49+
lines = ci_text.splitlines()
50+
fqbn_idx = None
51+
fqbn_indent = None
52+
for idx, line in enumerate(lines):
53+
if line.strip().startswith("fqbn:"):
54+
fqbn_idx = idx
55+
fqbn_indent = _leading_spaces_count(line)
56+
break
57+
if fqbn_idx is None:
58+
return None
59+
chip_idx = None
60+
chip_indent = None
61+
for j in range(fqbn_idx + 1, len(lines)):
62+
line = lines[j]
63+
if not line.strip():
64+
continue
65+
indent = _leading_spaces_count(line)
66+
if indent <= fqbn_indent:
67+
break
68+
stripped = line.strip()
69+
# Match '<chip>:' at this indentation level
70+
if stripped.startswith(f"{chip}:"):
71+
chip_idx = j
72+
chip_indent = indent
73+
break
74+
if chip_idx is None:
75+
return None
76+
count = 0
77+
for k in range(chip_idx + 1, len(lines)):
78+
line = lines[k]
79+
if not line.strip():
80+
continue
81+
indent = _leading_spaces_count(line)
82+
if indent <= chip_indent:
83+
break
84+
if line.strip().startswith("-"):
85+
count += 1
86+
return count if count > 0 else 1
87+
88+
89+
def detect_fqbn_count(test_dir: Path, chip: str) -> int:
90+
"""Return number of FQBN configs for this test and chip. Defaults to 1.
91+
92+
Tries PyYAML if available; otherwise uses a simple indentation-based parser.
93+
"""
94+
ci_path = test_dir / "ci.yml"
95+
if not ci_path.exists():
96+
return 1
97+
try:
98+
import yaml # type: ignore
99+
100+
data = yaml.safe_load(ci_path.read_text(encoding="utf-8")) or {}
101+
fqbn = data.get("fqbn", {})
102+
if isinstance(fqbn, dict):
103+
v = fqbn.get(chip)
104+
if isinstance(v, list):
105+
return len(v) if len(v) > 0 else 1
106+
return 1
107+
except Exception:
108+
# Fallback to manual parsing
109+
try:
110+
text = ci_path.read_text(encoding="utf-8", errors="ignore")
111+
except Exception:
112+
return 1
113+
length = _manual_parse_fqbn_length(text, chip)
114+
return length if length is not None else 1
115+
116+
117+
def main() -> int:
118+
groups_json = os.environ.get("MISSING_GROUPS_JSON")
119+
if groups_json:
120+
try:
121+
groups = json.loads(groups_json)
122+
if not isinstance(groups, list):
123+
groups = []
124+
except Exception:
125+
groups = []
126+
for g in groups:
127+
if not isinstance(g, dict):
128+
continue
129+
chip = str(g.get("chip", "unknown"))
130+
test_type = str(g.get("test_type", "unknown"))
131+
required_tags = " ".join(g.get("required_tags", []) or [])
132+
test_dirs = g.get("test_dirs", []) or []
133+
for test_dir in test_dirs:
134+
sketchdir = Path(test_dir)
135+
sketchname = sketchdir.name
136+
count = detect_fqbn_count(sketchdir, chip)
137+
if count <= 1:
138+
out_path = sketchdir / chip / f"{sketchname}.xml"
139+
suite_name = f"{test_type}_hardware_{chip}_{sketchname}"
140+
msg = f"No available runner matches required tags: {required_tags} (chip={chip})"
141+
write_single_suite(out_path, suite_name, sketchname, msg)
142+
print(f"Wrote JUnit error report to {out_path}")
143+
else:
144+
for i in range(count):
145+
out_path = sketchdir / chip / f"{sketchname}{i}.xml"
146+
suite_name = f"{test_type}_hardware_{chip}_{sketchname}{i}"
147+
msg = f"No available runner matches required tags: {required_tags} (chip={chip})"
148+
write_single_suite(out_path, suite_name, f"{sketchname}{i}", msg)
149+
print(f"Wrote JUnit error report to {out_path}")
150+
return 0
151+
152+
# Legacy single-group envs
153+
tests = read_env_list("TEST_LIST")
154+
chip = os.environ.get("TEST_CHIP", "unknown")
155+
test_type = os.environ.get("TEST_TYPE", "unknown")
156+
required_tags = os.environ.get("REQUIRED_TAGS", "").strip()
157+
158+
if tests:
159+
for test_dir in tests:
160+
sketchdir = Path(test_dir)
161+
sketchname = sketchdir.name
162+
# Determine number of configs (FQBN list entries) for this chip
163+
count = detect_fqbn_count(sketchdir, chip)
164+
if count <= 1:
165+
out_path = sketchdir / chip / f"{sketchname}.xml"
166+
suite_name = f"{test_type}_hardware_{chip}_{sketchname}"
167+
msg = f"No available runner matches required tags: {required_tags} (chip={chip})"
168+
write_single_suite(out_path, suite_name, sketchname, msg)
169+
print(f"Wrote JUnit error report to {out_path}")
170+
else:
171+
for i in range(count):
172+
out_path = sketchdir / chip / f"{sketchname}{i}.xml"
173+
suite_name = f"{test_type}_hardware_{chip}_{sketchname}{i}"
174+
msg = f"No available runner matches required tags: {required_tags} (chip={chip})"
175+
write_single_suite(out_path, suite_name, f"{sketchname}{i}", msg)
176+
print(f"Wrote JUnit error report to {out_path}")
177+
else:
178+
# Fallback: produce a generic suite so the pipeline reports an error
179+
out_dir = Path("tests") / test_type / chip
180+
out_path = out_dir / "missing_runner.xml"
181+
suite_name = f"{test_type}_hardware_{chip}_missing"
182+
msg = f"No available runner matches required tags: {required_tags} (chip={chip})"
183+
write_single_suite(out_path, suite_name, "missing_runner", msg)
184+
print(f"Wrote JUnit error report to {out_path}")
185+
return 0
186+
187+
188+
if __name__ == "__main__":
189+
sys.exit(main())
190+
191+

0 commit comments

Comments
 (0)