Skip to content

Commit 40748ea

Browse files
authored
uses legacy approach (pure python) to parsing a diff str when pygit2 fails. added unit test to notify when bug is fixed prints an ERROR and a WARNING log message in users' CI runs when legacy approach was taken
1 parent 3104f79 commit 40748ea

File tree

4 files changed

+167
-2
lines changed

4 files changed

+167
-2
lines changed

cpp_linter/git.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
GIT_STATUS_INDEX_NEW,
1616
GIT_STATUS_INDEX_MODIFIED,
1717
GIT_STATUS_INDEX_RENAMED,
18+
GitError,
1819
)
1920
from . import logger, CACHE_PATH, FileObj
21+
from .git_str import parse_diff as legacy_parse_diff
2022

2123

2224
def get_sha(repo: Repository, parent: Optional[int] = None) -> GitObject:
@@ -86,7 +88,11 @@ def parse_diff(diff_obj: Union[Diff, str]) -> List[FileObj]:
8688
"""
8789
file_objects: List[FileObj] = []
8890
if isinstance(diff_obj, str):
89-
diff_obj = Diff.parse_diff(diff_obj)
91+
try:
92+
diff_obj = Diff.parse_diff(diff_obj)
93+
except GitError as exc:
94+
logger.warning(f"pygit2.Diff.parse_diff() threw {exc}")
95+
return legacy_parse_diff(diff_obj)
9096
for patch in diff_obj:
9197
if patch.delta.status not in ADDITIVE_STATUS:
9298
continue

cpp_linter/git_str.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""This was reintroduced to deal with any bugs in pygit2 (or the libgit2 C library it
2+
binds to). The `parse_diff()` function here is only used when
3+
`pygit2.Diff.parse_diff()` function fails in `cpp_linter.git.parse_diff()`"""
4+
import re
5+
from typing import Optional, List, Tuple, cast
6+
from . import FileObj, logger
7+
8+
9+
DIFF_FILE_DELIMITER = re.compile(r"^diff --git a/.*$", re.MULTILINE)
10+
DIFF_FILE_NAME = re.compile(r"^\+\+\+\sb?/(.*)$", re.MULTILINE)
11+
DIFF_RENAMED_FILE = re.compile(r"^rename to (.*)$", re.MULTILINE)
12+
DIFF_BINARY_FILE = re.compile(r"^Binary\sfiles\s", re.MULTILINE)
13+
HUNK_INFO = re.compile(r"@@\s\-\d+,\d+\s\+(\d+,\d+)\s@@", re.MULTILINE)
14+
15+
16+
def _get_filename_from_diff(front_matter: str) -> Optional[re.Match]:
17+
"""Get the filename from content in the given diff front matter."""
18+
filename_match = DIFF_FILE_NAME.search(front_matter)
19+
if filename_match is not None:
20+
return filename_match
21+
22+
# check for renamed file name
23+
rename_match = DIFF_RENAMED_FILE.search(front_matter)
24+
if rename_match is not None and front_matter.lstrip().startswith("similarity"):
25+
return rename_match
26+
# We may need to compensate for other instances where the filename is
27+
# not directly after `+++ b/`. Binary files are another example of this.
28+
if DIFF_BINARY_FILE.search(front_matter) is None:
29+
# log the case and hope it helps in the future
30+
logger.warning( # pragma: no cover
31+
"Unrecognized diff starting with:\n%s",
32+
"\n".join(front_matter.splitlines()),
33+
)
34+
return None
35+
36+
def parse_diff(full_diff: str) -> List[FileObj]:
37+
"""Parse a given diff into file objects.
38+
39+
:param full_diff: The complete diff for an event.
40+
:returns: A `list` of `FileObj` instances containing information about the files
41+
changed.
42+
"""
43+
file_objects: List[FileObj] = []
44+
logger.error("Using pure python to parse diff because pygit2 failed!")
45+
file_diffs = DIFF_FILE_DELIMITER.split(full_diff.lstrip("\n"))
46+
for diff in file_diffs:
47+
if not diff or diff.lstrip().startswith("deleted file"):
48+
continue
49+
first_hunk = HUNK_INFO.search(diff)
50+
hunk_start = -1 if first_hunk is None else first_hunk.start()
51+
diff_front_matter = diff[:hunk_start]
52+
filename_match = _get_filename_from_diff(diff_front_matter)
53+
if filename_match is None:
54+
continue
55+
filename = cast(str, filename_match.groups(0)[0])
56+
if first_hunk is None:
57+
continue
58+
diff_chunks, additions = _parse_patch(diff[first_hunk.start() :])
59+
file_objects.append(FileObj(filename, additions, diff_chunks))
60+
return file_objects
61+
62+
63+
def _parse_patch(full_patch: str) -> Tuple[List[List[int]], List[int]]:
64+
"""Parse a diff's patch accordingly.
65+
66+
:param full_patch: The entire patch of hunks for 1 file.
67+
:returns:
68+
A `tuple` of lists where
69+
70+
- Index 0 is the ranges of lines in the diff. Each item in this `list` is a
71+
2 element `list` describing the starting and ending line numbers.
72+
- Index 1 is a `list` of the line numbers that contain additions.
73+
"""
74+
ranges: List[List[int]] = []
75+
# additions is a list line numbers in the diff containing additions
76+
additions: List[int] = []
77+
line_numb_in_diff: int = 0
78+
chunks = HUNK_INFO.split(full_patch)
79+
for index, chunk in enumerate(chunks):
80+
if index % 2 == 1:
81+
# each odd element holds the starting line number and number of lines
82+
start_line, hunk_length = [int(x) for x in chunk.split(",")]
83+
ranges.append([start_line, hunk_length + start_line])
84+
line_numb_in_diff = start_line
85+
continue
86+
# each even element holds the actual line changes
87+
for i, line in enumerate(chunk.splitlines()):
88+
if line.startswith("+"):
89+
additions.append(line_numb_in_diff)
90+
if not line.startswith("-") and i: # don't increment on first line
91+
line_numb_in_diff += 1
92+
return (ranges, additions)

tests/test_git_str.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import logging
2+
import pytest
3+
from cpp_linter import logger
4+
from cpp_linter.git import parse_diff
5+
from cpp_linter.git_str import parse_diff as parse_diff_str
6+
7+
8+
def test_pygit2_bug1260(caplog: pytest.LogCaptureFixture):
9+
"""This test the legacy approach of parsing a diff str using pure python regex
10+
patterns.
11+
12+
See https://github.com/libgit2/pygit2/issues/1260 for details
13+
"""
14+
diff_str = "\n".join(
15+
[
16+
"diff --git a/path/for/Some file.cpp b/path/to/Some file.cpp",
17+
"similarity index 99%",
18+
"rename from path/for/Some file.cpp",
19+
"rename to path/to/Some file.cpp",
20+
]
21+
)
22+
caplog.set_level(logging.WARNING, logger=logger.name)
23+
# the bug in libgit2 should trigger a call to
24+
# cpp_linter.git_str.legacy_parse_diff()
25+
files = parse_diff(diff_str)
26+
assert caplog.messages, "this test is no longer needed; bug was fixed in pygit2"
27+
# if we get here test, then is satisfied
28+
assert not files # no line changes means no file to focus on
29+
30+
def test_typical_diff():
31+
"""For coverage completeness. Also tests for files with spaces in the names."""
32+
diff_str = "\n".join(
33+
[
34+
"diff --git a/path/for/Some file.cpp b/path/to/Some file.cpp",
35+
"--- a/path/for/Some file.cpp",
36+
"+++ b/path/to/Some file.cpp",
37+
"@@ -3,7 +3,7 @@",
38+
" ",
39+
" ",
40+
" ",
41+
"-#include <some_lib/render/animation.hpp>",
42+
"+#include <some_lib/render/animations.hpp>",
43+
" ",
44+
" ",
45+
" \n",
46+
]
47+
)
48+
from_c = parse_diff(diff_str)
49+
from_py = parse_diff_str(diff_str)
50+
assert [f.serialize() for f in from_c] == [f.serialize() for f in from_py]
51+
for file_obj in from_c:
52+
# file name should have spaces
53+
assert " " in file_obj.name
54+
55+
56+
def test_binary_diff():
57+
"""For coverage completeness"""
58+
diff_str = "\n".join(
59+
[
60+
"diff --git a/some picture.png b/some picture.png",
61+
"new file mode 100644",
62+
"Binary files /dev/null and b/some picture.png differ",
63+
]
64+
)
65+
files = parse_diff_str(diff_str)
66+
# binary files are ignored during parsing
67+
assert not files

tests/test_misc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_list_src_files(
8181
extensions: List[str],
8282
):
8383
"""List the source files in the demo folder of this repo."""
84-
monkeypatch.setattr(Globals, "FILES", [])
84+
monkeypatch.setattr(Globals, "FILES", [])
8585
monkeypatch.chdir(Path(__file__).parent.as_posix())
8686
caplog.set_level(logging.DEBUG, logger=cpp_linter.logger.name)
8787
list_source_files(ext_list=extensions, ignored_paths=[], not_ignored=[])

0 commit comments

Comments
 (0)