Skip to content

Commit f1c097c

Browse files
committed
Only show GitHub check annotations on changed doc paragraphs
1 parent dc8fdf5 commit f1c097c

File tree

2 files changed

+155
-28
lines changed

2 files changed

+155
-28
lines changed

.github/workflows/reusable-docs.yml

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ jobs:
1818
timeout-minutes: 60
1919
steps:
2020
- uses: actions/checkout@v3
21+
with:
22+
fetch-depth: 0
2123
- name: 'Set up Python'
2224
uses: actions/setup-python@v4
2325
with:
@@ -28,13 +30,6 @@ jobs:
2830
run: make -C Doc/ venv
2931

3032
# To annotate PRs with Sphinx nitpicks (missing references)
31-
- name: 'Get list of changed files'
32-
if: github.event_name == 'pull_request'
33-
id: changed_files
34-
uses: Ana06/[email protected]
35-
with:
36-
filter: "Doc/**"
37-
format: csv # works for paths with spaces
3833
- name: 'Build HTML documentation'
3934
continue-on-error: true
4035
run: |
@@ -45,7 +40,7 @@ jobs:
4540
if: github.event_name == 'pull_request'
4641
run: |
4742
python Doc/tools/check-warnings.py \
48-
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \
43+
--check-and-annotate-changed 'origin/${{ github.base_ref }}' '${{ github.sha }}' \
4944
--fail-if-regression \
5045
--fail-if-improved
5146

Doc/tools/check-warnings.py

Lines changed: 152 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
Check the output of running Sphinx in nit-picky mode (missing references).
44
"""
55
import argparse
6-
import csv
6+
import itertools
77
import os
88
import re
9+
import subprocess
910
import sys
1011
from pathlib import Path
12+
from typing import TextIO
1113

1214
# Exclude these whether they're dirty or clean,
1315
# because they trigger a rebuild of dirty files.
@@ -24,28 +26,157 @@
2426
"venv",
2527
}
2628

27-
PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)")
29+
# Regex pattern to match the parts of a Sphinx warning
30+
WARNING_PATTERN = re.compile(
31+
r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
32+
)
2833

34+
# Regex pattern to match the line numbers in a Git unified diff
35+
DIFF_PATTERN = re.compile(
36+
r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
37+
flags=re.MULTILINE,
38+
)
2939

30-
def check_and_annotate(warnings: list[str], files_to_check: str) -> None:
40+
41+
def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
42+
"""List the files changed between two Gif refs, filtered by change type."""
43+
added_files_result = subprocess.run(
44+
[
45+
"git",
46+
"diff",
47+
f"--diff-filter={filter_mode}",
48+
"--name-only",
49+
f"{ref_a}...{ref_b}",
50+
"--",
51+
],
52+
stdout=subprocess.PIPE,
53+
check=True,
54+
text=True,
55+
)
56+
57+
added_files = added_files_result.stdout.strip().split("\n")
58+
return {Path(file.strip()) for file in added_files if file.strip()}
59+
60+
61+
def get_diff_lines(ref_a: str, ref_b: str, file: os.PathLike) -> list[int]:
62+
"""List the lines changed between two Gif refs for a specific file."""
63+
diff_output = subprocess.run(
64+
[
65+
"git",
66+
"diff",
67+
"--unified=0",
68+
"--diff-filter=M",
69+
f"{ref_a}...{ref_b}",
70+
"--",
71+
str(file),
72+
],
73+
stdout=subprocess.PIPE,
74+
check=True,
75+
text=True,
76+
)
77+
78+
line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
79+
line_match_values = [
80+
line_match.groupdict(default=1) for line_match in line_matches
81+
]
82+
line_ints = [
83+
(int(match_value["lineb"]), int(match_value["added"]))
84+
for match_value in line_match_values
85+
]
86+
line_ranges = [
87+
range(line_b, line_b + added) for line_b, added in line_ints
88+
]
89+
line_numbers = list(itertools.chain(*line_ranges))
90+
91+
return line_numbers
92+
93+
94+
def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
95+
"""Get the line numbers of text in a file object, grouped by paragraph."""
96+
paragraphs = []
97+
prev_line = None
98+
for lineno, line in enumerate(file_obj):
99+
lineno = lineno + 1
100+
if prev_line is None or (line.strip() and not prev_line.strip()):
101+
paragraph = [lineno - 1]
102+
paragraphs.append(paragraph)
103+
paragraph.append(lineno)
104+
prev_line = line
105+
return paragraphs
106+
107+
108+
def process_touched_warnings(
109+
warnings: list[str], ref_a: str, ref_b: str
110+
) -> list[re.Match[str]]:
111+
"""Filter a list of Sphinx warnings to those affecting touched lines."""
112+
added_files, modified_files = tuple(
113+
get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
114+
)
115+
warnings_added, warnings_modified = tuple(
116+
[
117+
WARNING_PATTERN.fullmatch(warning.strip())
118+
for warning in warnings
119+
if any(str(file) in warning for file in files)
120+
]
121+
for files in (added_files, modified_files)
122+
)
123+
warnings_added, warnings_modified = tuple(
124+
[warning for warning in warning_matches if warning]
125+
for warning_matches in (warnings_added, warnings_modified)
126+
)
127+
128+
modified_files_warned = {
129+
file
130+
for file in modified_files
131+
if any(str(file) in warning["file"] for warning in warnings_modified)
132+
}
133+
134+
warnings_touched = warnings_added.copy()
135+
for file in modified_files_warned:
136+
diff_lines = get_diff_lines(ref_a, ref_b, file)
137+
with file.open(encoding="UTF-8") as file_obj:
138+
paragraphs = get_para_line_numbers(file_obj)
139+
touched_paras = [
140+
para_lines
141+
for para_lines in paragraphs
142+
if set(diff_lines) & set(para_lines)
143+
]
144+
touched_para_lines = set(itertools.chain(*touched_paras))
145+
warnings_infile = [
146+
warning
147+
for warning in warnings_modified
148+
if str(file) in warning["file"]
149+
]
150+
warnings_touched += [
151+
warning
152+
for warning in warnings_infile
153+
if int(warning["line"]) in touched_para_lines
154+
]
155+
156+
return warnings_touched
157+
158+
159+
def check_and_annotate_changed(
160+
warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
161+
) -> None:
31162
"""
32-
Convert Sphinx warning messages to GitHub Actions.
163+
Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
33164
34165
Converts lines like:
35166
.../Doc/library/cgi.rst:98: WARNING: reference target not found
36167
to:
37168
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
38169
39-
Non-matching lines are echoed unchanged.
40-
41-
see:
170+
See:
42171
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
43172
"""
44-
files_to_check = next(csv.reader([files_to_check]))
45-
for warning in warnings:
46-
if any(filename in warning for filename in files_to_check):
47-
if match := PATTERN.fullmatch(warning):
48-
print("::warning file={file},line={line}::{msg}".format_map(match))
173+
warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
174+
print("Emitting doc warnings matching modified lines:")
175+
for warning in warnings_touched:
176+
print(warning[0])
177+
print("::warning file={file},line={line}::{msg}".format_map(warning))
178+
if not warnings_touched:
179+
print("None")
49180

50181

51182
def fail_if_regression(
@@ -68,7 +199,7 @@ def fail_if_regression(
68199
print(filename)
69200
for warning in warnings:
70201
if filename in warning:
71-
if match := PATTERN.fullmatch(warning):
202+
if match := WARNING_PATTERN.fullmatch(warning):
72203
print(" {line}: {msg}".format_map(match))
73204
return -1
74205
return 0
@@ -94,9 +225,10 @@ def fail_if_improved(
94225
def main() -> int:
95226
parser = argparse.ArgumentParser()
96227
parser.add_argument(
97-
"--check-and-annotate",
98-
help="Comma-separated list of files to check, "
99-
"and annotate those with warnings on GitHub Actions",
228+
"--check-and-annotate-changed",
229+
nargs=2,
230+
help="Annotate lines changed between two refs "
231+
"with warnings on GitHub Actions",
100232
)
101233
parser.add_argument(
102234
"--fail-if-regression",
@@ -114,7 +246,7 @@ def main() -> int:
114246
wrong_directory_msg = "Must run this script from the repo root"
115247
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg
116248

117-
with Path("Doc/sphinx-warnings.txt").open() as f:
249+
with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
118250
warnings = f.read().splitlines()
119251

120252
cwd = str(Path.cwd()) + os.path.sep
@@ -124,15 +256,15 @@ def main() -> int:
124256
if "Doc/" in warning
125257
}
126258

127-
with Path("Doc/tools/.nitignore").open() as clean_files:
259+
with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
128260
files_with_expected_nits = {
129261
filename.strip()
130262
for filename in clean_files
131263
if filename.strip() and not filename.startswith("#")
132264
}
133265

134-
if args.check_and_annotate:
135-
check_and_annotate(warnings, args.check_and_annotate)
266+
if args.check_and_annotate_changed:
267+
check_and_annotate_changed(warnings, *args.check_and_annotate_changed)
136268

137269
if args.fail_if_regression:
138270
exit_code += fail_if_regression(

0 commit comments

Comments
 (0)