Skip to content

Improve filter_by_diff.py and friends #1350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions scripts/diff_to_added_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env python

from __future__ import print_function

def diff_to_added_lines(diff_file, repository_root, out_stream):

import unidiff
import os.path
import json

# Create a set of all the files and the specific lines within that file that are in the diff
added_lines = dict()

for file_in_diff in unidiff.PatchSet.from_filename(diff_file):
filename = file_in_diff.target_file
# Skip files deleted in the tip (b side of the diff):
if filename == "/dev/null":
continue
assert filename.startswith("b/")
filename = os.path.join(repository_root, filename[2:])
if filename not in added_lines:
added_lines[filename] = []
added_lines[filename].append(0)
for diff_hunk in file_in_diff:
for diff_line in diff_hunk:
if diff_line.line_type == "+":
if filename not in added_lines:
added_lines[filename] = []
added_lines[filename].append(diff_line.target_line_no)

json.dump(added_lines, out_stream)

if __name__ == "__main__":

import sys

if len(sys.argv) != 3:
print("diff_to_added_lines.py: converts a unified-diff file into a JSON dictionary mapping filenames onto an array of added or modified line numbers", file=sys.stderr)
print("Usage: diff_to_added_lines.py diff.patch repository_root_directory", file=sys.stderr)

sys.exit(1)

diff_to_added_lines(sys.argv[1], sys.argv[2], sys.stdout)

diff_file = sys.argv[1]
repository_root = sys.argv[2]
53 changes: 0 additions & 53 deletions scripts/filter_by_diff.py

This file was deleted.

56 changes: 56 additions & 0 deletions scripts/filter_by_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python

from __future__ import print_function

def filter_by_lines(diffed_file, added_lines_file, repository_root, in_stream, out_stream):

import os.path
import json

diffed_file = sys.argv[1]
added_lines_file = sys.argv[2]
repository_root = sys.argv[3]

# Get a set of all the files and the specific lines within that file to keep:
with open(added_lines_file, "r") as f:
added_lines = json.load(f)

# added_lines is a dict filename -> line_number list
# Make those into line_number sets instead:

for k in added_lines:
added_lines[k] = set(added_lines[k])

# Print the lines that are in the set
found = False
for line in in_stream:
line_parts = line.split(":")
if len(line_parts) < 3:
if found:
# Print lines between a matching warning and the next warning
out_stream.write(line)
continue
try:
linenum = int(line_parts[1])
found = False
filename = line_parts[0]
if not repository_root in filename:
filename = os.path.join(repository_root, line_parts[0])
if filename in added_lines and linenum in added_lines[filename]:
found = True
out_stream.write(line)
except ValueError:
if found:
out_stream.write(line)
continue

if __name__ == "__main__":

import sys

if len(sys.argv) != 4:
print("filter_by_lines.py: filters lines of the form filename:line_number:message, retaining those matching a particular filename and list of line numbers", file=sys.stderr)
print("Usage: filter_by_lines.py diffed_file added_lines.json repository_root_directory < warnings.txt", file=sys.stderr)
sys.exit(1)

filter_by_lines(sys.argv[1], sys.argv[2], sys.argv[3], sys.stdin, sys.stdout)
37 changes: 24 additions & 13 deletions scripts/run_diff.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

set -e

script_folder=`dirname $0`
absolute_repository_root=`git rev-parse --show-toplevel`
script_folder=$(dirname "$0")
absolute_repository_root=$(git rev-parse --show-toplevel)
mode=$1
modes="CPPLINT | DOXYGEN"

Expand All @@ -16,22 +16,28 @@ then
exit 1
fi

if ! [[ -e $script_folder/filter_by_diff.py ]]
if ! [[ -e ${script_folder}/filter_by_lines.py ]]
then
echo "Filter script could not be found in the $script_folder directory"
echo "Ensure filter_by_diff.py is inside the $script_folder directory then run again"
echo "Ensure filter_by_lines.py is inside the $script_folder directory then run again"
exit 1
fi

if [[ "$mode" == "CPPLINT" ]]
then
if ! [[ -e $script_folder/cpplint.py ]]
if ! which iconv >/dev/null
then
echo "Couldn't find iconv in current path. Please install and try again"
exit 1
fi

if ! [[ -e "${script_folder}/cpplint.py" ]]
then
echo "Lint script could not be found in the $script_folder directory"
echo "Ensure cpplint.py is inside the $script_folder directory then run again"
exit 1
else
cmd='$script_folder/cpplint.py $file 2>&1 >/dev/null'
cmd='${script_folder}/cpplint.py $file 2>&1 >/dev/null'
fi
elif [[ "$mode" == "DOXYGEN" ]]
then
Expand Down Expand Up @@ -72,21 +78,26 @@ else
git_merge_base_end="HEAD"
fi

git_start=`git merge-base $git_start $git_merge_base_end`
git_start=$(git merge-base $git_start $git_merge_base_end)

cleanup()
{
rm -f $diff_file
rm -f "$diff_file" "$added_lines_file"
}

trap cleanup EXIT

diff_file=`mktemp`
diff_file=$(mktemp)
added_lines_file=$(mktemp)

# Pass the output through iconv to remove any invalid UTF-8 (diff_to_added_lines.py will die otherwise)

git diff $git_start $git_end | iconv -t utf-8 -c > "$diff_file"

git diff $git_start $git_end > $diff_file
# Get the list of files that have changed, that end with lintable extensions
diff_files=$(git diff --name-only $git_start $git_end | grep "\.\(\(cpp\)\|\(hh\)\|\(cc\)\|h\)$" || true)

# Get the list of files that have changed
diff_files=`git diff --name-only $git_start $git_end`
"${script_folder}/diff_to_added_lines.py" "$diff_file" "$absolute_repository_root" > "$added_lines_file"

for file in $diff_files; do
file=$absolute_repository_root/$file
Expand All @@ -98,7 +109,7 @@ for file in $diff_files; do

# Run the linting script and filter:
# The errors from the linter go to STDERR so must be redirected to STDOUT
result=`eval $cmd | $script_folder/filter_by_diff.py $file $diff_file $absolute_repository_root`
result=$(eval $cmd | "${script_folder}/filter_by_lines.py" "$file" "$added_lines_file" "$absolute_repository_root")

# Providing some errors were relevant we print them out
if [ "$result" ]
Expand Down