From dce6ae141ad11ce4a5c5bbd792f0a1a90adb65c0 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 5 Sep 2017 13:47:28 +0100 Subject: [PATCH 1/2] Improve filter_by_diff.py and friends * Attempts to cope with invalid UTF-8 (present in a few CBMC files) using iconv * Splits the filter-by-diff job into two stages (diff -> JSON description of interesting lines and JSON -> filtered output) This means that run_diff.sh CPPLINT is now practical, and produces a report of the linting problems in develop but not in master (around 150 lines as of the time of writing) in about 2 minutes. --- scripts/diff_to_added_lines.py | 46 ++++++++++++++++++++++++++++ scripts/filter_by_diff.py | 53 -------------------------------- scripts/filter_by_lines.py | 56 ++++++++++++++++++++++++++++++++++ scripts/run_diff.sh | 25 ++++++++++----- 4 files changed, 120 insertions(+), 60 deletions(-) create mode 100755 scripts/diff_to_added_lines.py delete mode 100755 scripts/filter_by_diff.py create mode 100755 scripts/filter_by_lines.py diff --git a/scripts/diff_to_added_lines.py b/scripts/diff_to_added_lines.py new file mode 100755 index 00000000000..09c7f89f547 --- /dev/null +++ b/scripts/diff_to_added_lines.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +from __future__ import print_function + +def diff_to_added_lines(diff_file, repository_root, out_stream): + + import unidiff + import os.path + import json + + # Create a set of all the files and the specific lines within that file that are in the diff + added_lines = dict() + + for file_in_diff in unidiff.PatchSet.from_filename(diff_file): + filename = file_in_diff.target_file + # Skip files deleted in the tip (b side of the diff): + if filename == "/dev/null": + continue + assert filename.startswith("b/") + filename = os.path.join(repository_root, filename[2:]) + if filename not in added_lines: + added_lines[filename] = [] + added_lines[filename].append(0) + for diff_hunk in file_in_diff: + for diff_line in diff_hunk: + if diff_line.line_type == "+": + if filename not in added_lines: + added_lines[filename] = [] + added_lines[filename].append(diff_line.target_line_no) + + json.dump(added_lines, out_stream) + +if __name__ == "__main__": + + import sys + + if len(sys.argv) != 3: + print("diff_to_added_lines.py: converts a unified-diff file into a JSON dictionary mapping filenames onto an array of added or modified line numbers", file=sys.stderr) + print("Usage: diff_to_added_lines.py diff.patch repository_root_directory", file=sys.stderr) + + sys.exit(1) + + diff_to_added_lines(sys.argv[1], sys.argv[2], sys.stdout) + +diff_file = sys.argv[1] +repository_root = sys.argv[2] diff --git a/scripts/filter_by_diff.py b/scripts/filter_by_diff.py deleted file mode 100755 index 3e7689ddf5b..00000000000 --- a/scripts/filter_by_diff.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python - -import sys -import unidiff -import os.path - -if len(sys.argv) != 4: - print >>sys.stderr, "Usage: filter_by_diff.py diffed_file diff.patch repository_root_directory < warnings.txt" - sys.exit(1) - -diffed_file = sys.argv[1] -diff_file = sys.argv[2] -repository_root = sys.argv[3] - -# Create a set of all the files and the specific lines within that file that are in the diff -added_lines = set() -for diff_file in unidiff.PatchSet.from_filename(diff_file): - filename = diff_file.target_file - # Skip files deleted in the tip (b side of the diff): - if filename == "/dev/null": - continue - assert filename.startswith("b/") - filename = os.path.join(repository_root, filename[2:]) - if filename != diffed_file: - continue - added_lines.add((filename, 0)) - for diff_hunk in diff_file: - for diff_line in diff_hunk: - if diff_line.line_type == "+": - added_lines.add((filename, diff_line.target_line_no)) - -# Print the lines that are in the set -found = False -for line in sys.stdin: - line_parts = line.split(":") - if len(line_parts) < 3: - if found: - # Print lines between a matching warning and the next warning - sys.stdout.write(line) - continue - try: - linenum = int(line_parts[1]) - found = False - filename = line_parts[0] - if not repository_root in filename: - filename = os.path.join(repository_root, line_parts[0]) - if (filename, linenum) in added_lines: - found = True - sys.stdout.write(line) - except ValueError: - if found: - sys.stdout.write(line) - continue diff --git a/scripts/filter_by_lines.py b/scripts/filter_by_lines.py new file mode 100755 index 00000000000..37c0cb41d83 --- /dev/null +++ b/scripts/filter_by_lines.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +from __future__ import print_function + +def filter_by_lines(diffed_file, added_lines_file, repository_root, in_stream, out_stream): + + import os.path + import json + + diffed_file = sys.argv[1] + added_lines_file = sys.argv[2] + repository_root = sys.argv[3] + + # Get a set of all the files and the specific lines within that file to keep: + with open(added_lines_file, "r") as f: + added_lines = json.load(f) + + # added_lines is a dict filename -> line_number list + # Make those into line_number sets instead: + + for k in added_lines: + added_lines[k] = set(added_lines[k]) + + # Print the lines that are in the set + found = False + for line in in_stream: + line_parts = line.split(":") + if len(line_parts) < 3: + if found: + # Print lines between a matching warning and the next warning + out_stream.write(line) + continue + try: + linenum = int(line_parts[1]) + found = False + filename = line_parts[0] + if not repository_root in filename: + filename = os.path.join(repository_root, line_parts[0]) + if filename in added_lines and linenum in added_lines[filename]: + found = True + out_stream.write(line) + except ValueError: + if found: + out_stream.write(line) + continue + +if __name__ == "__main__": + + import sys + + if len(sys.argv) != 4: + print("filter_by_lines.py: filters lines of the form filename:line_number:message, retaining those matching a particular filename and list of line numbers", file=sys.stderr) + print("Usage: filter_by_lines.py diffed_file added_lines.json repository_root_directory < warnings.txt", file=sys.stderr) + sys.exit(1) + + filter_by_lines(sys.argv[1], sys.argv[2], sys.argv[3], sys.stdin, sys.stdout) diff --git a/scripts/run_diff.sh b/scripts/run_diff.sh index 0e388537fb4..fee1f7daf24 100755 --- a/scripts/run_diff.sh +++ b/scripts/run_diff.sh @@ -16,15 +16,21 @@ then exit 1 fi -if ! [[ -e $script_folder/filter_by_diff.py ]] +if ! [[ -e $script_folder/filter_by_lines.py ]] then echo "Filter script could not be found in the $script_folder directory" - echo "Ensure filter_by_diff.py is inside the $script_folder directory then run again" + echo "Ensure filter_by_lines.py is inside the $script_folder directory then run again" exit 1 fi if [[ "$mode" == "CPPLINT" ]] then + if ! which iconv >/dev/null + then + echo "Couldn't find iconv in current path. Please install and try again" + exit 1 + fi + if ! [[ -e $script_folder/cpplint.py ]] then echo "Lint script could not be found in the $script_folder directory" @@ -76,17 +82,22 @@ git_start=`git merge-base $git_start $git_merge_base_end` cleanup() { - rm -f $diff_file + rm -f $diff_file $added_lines_file } trap cleanup EXIT diff_file=`mktemp` +added_lines_file=`mktemp` + +# Pass the output through iconv to remove any invalid UTF-8 (diff_to_added_lines.py will die otherwise) + +git diff $git_start $git_end | iconv -t utf-8 -c > $diff_file -git diff $git_start $git_end > $diff_file +# Get the list of files that have changed, that end with lintable extensions +diff_files=`git diff --name-only $git_start $git_end | grep "\.\(\(cpp\)\|\(hh\)\|\(cc\)\|h\)$" || true` -# Get the list of files that have changed -diff_files=`git diff --name-only $git_start $git_end` +$script_folder/diff_to_added_lines.py $diff_file $absolute_repository_root > $added_lines_file for file in $diff_files; do file=$absolute_repository_root/$file @@ -98,7 +109,7 @@ for file in $diff_files; do # Run the linting script and filter: # The errors from the linter go to STDERR so must be redirected to STDOUT - result=`eval $cmd | $script_folder/filter_by_diff.py $file $diff_file $absolute_repository_root` + result=`eval $cmd | $script_folder/filter_by_diff.py $file $added_lines_file $absolute_repository_root` # Providing some errors were relevant we print them out if [ "$result" ] From 389221ebee1e6a3dced6ca6aa8e7ef99155707f6 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 6 Sep 2017 11:06:28 +0100 Subject: [PATCH 2/2] run_diff.sh shellcheck fixes * Replace backticks with $() * Double-quote filenames that might conceivably contain troublesome special chars (*, space etc) --- scripts/run_diff.sh | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/run_diff.sh b/scripts/run_diff.sh index fee1f7daf24..aed0d24ddb0 100755 --- a/scripts/run_diff.sh +++ b/scripts/run_diff.sh @@ -2,8 +2,8 @@ set -e -script_folder=`dirname $0` -absolute_repository_root=`git rev-parse --show-toplevel` +script_folder=$(dirname "$0") +absolute_repository_root=$(git rev-parse --show-toplevel) mode=$1 modes="CPPLINT | DOXYGEN" @@ -16,7 +16,7 @@ then exit 1 fi -if ! [[ -e $script_folder/filter_by_lines.py ]] +if ! [[ -e ${script_folder}/filter_by_lines.py ]] then echo "Filter script could not be found in the $script_folder directory" echo "Ensure filter_by_lines.py is inside the $script_folder directory then run again" @@ -31,13 +31,13 @@ then exit 1 fi - if ! [[ -e $script_folder/cpplint.py ]] + if ! [[ -e "${script_folder}/cpplint.py" ]] then echo "Lint script could not be found in the $script_folder directory" echo "Ensure cpplint.py is inside the $script_folder directory then run again" exit 1 else - cmd='$script_folder/cpplint.py $file 2>&1 >/dev/null' + cmd='${script_folder}/cpplint.py $file 2>&1 >/dev/null' fi elif [[ "$mode" == "DOXYGEN" ]] then @@ -78,26 +78,26 @@ else git_merge_base_end="HEAD" fi -git_start=`git merge-base $git_start $git_merge_base_end` +git_start=$(git merge-base $git_start $git_merge_base_end) cleanup() { - rm -f $diff_file $added_lines_file + rm -f "$diff_file" "$added_lines_file" } trap cleanup EXIT -diff_file=`mktemp` -added_lines_file=`mktemp` +diff_file=$(mktemp) +added_lines_file=$(mktemp) # Pass the output through iconv to remove any invalid UTF-8 (diff_to_added_lines.py will die otherwise) -git diff $git_start $git_end | iconv -t utf-8 -c > $diff_file +git diff $git_start $git_end | iconv -t utf-8 -c > "$diff_file" # Get the list of files that have changed, that end with lintable extensions -diff_files=`git diff --name-only $git_start $git_end | grep "\.\(\(cpp\)\|\(hh\)\|\(cc\)\|h\)$" || true` +diff_files=$(git diff --name-only $git_start $git_end | grep "\.\(\(cpp\)\|\(hh\)\|\(cc\)\|h\)$" || true) -$script_folder/diff_to_added_lines.py $diff_file $absolute_repository_root > $added_lines_file +"${script_folder}/diff_to_added_lines.py" "$diff_file" "$absolute_repository_root" > "$added_lines_file" for file in $diff_files; do file=$absolute_repository_root/$file @@ -109,7 +109,7 @@ for file in $diff_files; do # Run the linting script and filter: # The errors from the linter go to STDERR so must be redirected to STDOUT - result=`eval $cmd | $script_folder/filter_by_diff.py $file $added_lines_file $absolute_repository_root` + result=$(eval $cmd | "${script_folder}/filter_by_lines.py" "$file" "$added_lines_file" "$absolute_repository_root") # Providing some errors were relevant we print them out if [ "$result" ]