-
Notifications
You must be signed in to change notification settings - Fork 262
NF nib-diff #617
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NF nib-diff #617
Changes from all commits
328d3bb
d293a20
5491af4
949762c
93b5e09
22804f1
f81a78b
fe9c052
5eb4477
e2defb0
5e3a767
7febf65
23a43ba
fae491d
3e87d81
a3b35d9
1491c61
397bc03
f192f65
92553a2
7a70d56
f5e930d
df82a51
6d706f5
774ce3b
911d781
0458694
fed70e9
0b59dfb
2920abf
1e57409
fd6c474
497ad2a
df0aa79
c23143c
db16d85
feca439
acf667b
bb3fbf0
8a92010
a9a572a
3290a66
92e4ed0
06e8dd7
8fd6995
df8bc04
45d3fbf
1cbf5b3
45bdf64
c600746
e26adb5
3802919
0ce86df
72bc800
0cf2a8c
5db2654
50a480e
9e155df
3c0c90c
f8c32b8
51733b0
41caade
f1cee5f
676ac70
f476c48
10c2c42
7989563
ae74339
82b1457
45d0edf
c1f553f
2f89242
6613522
a311d7b
2cd69b5
414da00
59006b0
672661e
baf6cdc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!python | ||
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- | ||
# vi: set ft=python sts=4 ts=4 sw=4 et: | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
# | ||
# See COPYING file distributed along with the NiBabel package for the | ||
# copyright and license terms. | ||
# | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
""" | ||
Quick diff summary for a set of neuroimaging files | ||
""" | ||
|
||
from nibabel.cmdline.diff import main | ||
|
||
if __name__ == '__main__': | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
-r requirements.txt | ||
nose | ||
mock | ||
hypothesis |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,207 @@ | ||
#!python | ||
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- | ||
# vi: set ft=python sts=4 ts=4 sw=4 et: | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
# | ||
# See COPYING file distributed along with the NiBabel package for the | ||
# copyright and license terms. | ||
# | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
""" | ||
Quick summary of the differences among a set of neuroimaging files | ||
""" | ||
from __future__ import division, print_function, absolute_import | ||
|
||
import re | ||
import sys | ||
from collections import OrderedDict | ||
from optparse import OptionParser, Option | ||
|
||
import numpy as np | ||
|
||
import nibabel as nib | ||
import nibabel.cmdline.utils | ||
import hashlib | ||
import os | ||
|
||
|
||
def get_opt_parser(): | ||
# use module docstring for help output | ||
p = OptionParser( | ||
usage="%s [OPTIONS] [FILE ...]\n\n" % sys.argv[0] + __doc__, | ||
version="%prog " + nib.__version__) | ||
|
||
p.add_options([ | ||
Option("-v", "--verbose", action="count", | ||
dest="verbose", default=0, | ||
help="Make more noise. Could be specified multiple times"), | ||
|
||
Option("-H", "--header-fields", | ||
dest="header_fields", default='all', | ||
help="Header fields (comma separated) to be printed as well (if present)"), | ||
]) | ||
|
||
return p | ||
|
||
|
||
def are_values_different(*values): | ||
"""Generically compares values, returns true if different""" | ||
value0 = values[0] | ||
values = values[1:] # to ensure that the first value isn't compared with itself | ||
|
||
for value in values: | ||
try: # we sometimes don't want NaN values | ||
if np.any(np.isnan(value0)) and np.any(np.isnan(value)): # if they're both NaN | ||
break | ||
elif np.any(np.isnan(value0)) or np.any(np.isnan(value)): # if only 1 is NaN | ||
return True | ||
|
||
except TypeError: | ||
pass | ||
|
||
if type(value0) != type(value): # if types are different, then we consider them different | ||
return True | ||
elif isinstance(value0, np.ndarray): | ||
return np.any(value0 != value) | ||
|
||
elif value0 != value: | ||
return True | ||
|
||
return False | ||
|
||
|
||
def get_headers_diff(file_headers, names=None): | ||
"""Get difference between headers | ||
|
||
Parameters | ||
---------- | ||
file_headers: list of actual headers (dicts) from files | ||
names: list of header fields to test | ||
|
||
Returns | ||
------- | ||
dict | ||
str: list for each header field which differs, return list of | ||
values per each file | ||
""" | ||
difference = OrderedDict() | ||
fields = names | ||
|
||
if names is None: | ||
fields = file_headers[0].keys() | ||
|
||
# for each header field | ||
for field in fields: | ||
values = [header.get(field) for header in file_headers] # get corresponding value | ||
|
||
# if these values are different, store them in a dictionary | ||
if are_values_different(*values): | ||
difference[field] = values | ||
|
||
return difference | ||
|
||
|
||
def get_data_diff(files): | ||
"""Get difference between md5 values | ||
|
||
Parameters | ||
---------- | ||
files: list of actual files | ||
|
||
Returns | ||
------- | ||
list | ||
np.array: md5 values of respective files | ||
""" | ||
|
||
md5sums = [ | ||
hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest() | ||
for f in files | ||
] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason you're using MD5 and not something more collision-resistant such as SHA256? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since MD5 is sufficient and shorter. It is unlikely that in our lifetime we would see any user who would run into a collision in this use case ;-) |
||
|
||
if len(set(md5sums)) == 1: | ||
return [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that might be one contributor to your .000?% coverage miss (do you have codecov extension to the browser installed to see what lines aren't covered?). Apparently there is no test which verifies that you do get empty list in output whenever two files have the same data? you could make a dedicated test for this function and feed it |
||
|
||
return md5sums | ||
|
||
|
||
def display_diff(files, diff): | ||
"""Format header differences into a nice string | ||
|
||
Parameters | ||
---------- | ||
files: list of files that were compared so we can print their names | ||
diff: dict of different valued header fields | ||
|
||
Returns | ||
------- | ||
str | ||
string-formatted table of differences | ||
""" | ||
output = "" | ||
field_width = "{:<15}" | ||
value_width = "{:<55}" | ||
|
||
output += "These files are different.\n" | ||
output += field_width.format('Field') | ||
|
||
for f in files: | ||
output += value_width.format(os.path.basename(f)) | ||
|
||
output += "\n" | ||
|
||
for key, value in diff.items(): | ||
output += field_width.format(key) | ||
|
||
for item in value: | ||
item_str = str(item) | ||
# Value might start/end with some invisible spacing characters so we | ||
# would "condition" it on both ends a bit | ||
item_str = re.sub('^[ \t]+', '<', item_str) | ||
item_str = re.sub('[ \t]+$', '>', item_str) | ||
# and also replace some other invisible symbols with a question | ||
# mark | ||
item_str = re.sub('[\x00]', '?', item_str) | ||
output += value_width.format(item_str) | ||
|
||
output += "\n" | ||
|
||
return output | ||
|
||
|
||
def main(args=None, out=None): | ||
"""Getting the show on the road""" | ||
out = out or sys.stdout | ||
parser = get_opt_parser() | ||
(opts, files) = parser.parse_args(args) | ||
|
||
nibabel.cmdline.utils.verbose_level = opts.verbose | ||
|
||
if nibabel.cmdline.utils.verbose_level < 3: | ||
# suppress nibabel format-compliance warnings | ||
nib.imageglobals.logger.level = 50 | ||
|
||
assert len(files) >= 2, "Please enter at least two files" | ||
|
||
file_headers = [nib.load(f).header for f in files] | ||
|
||
# signals "all fields" | ||
if opts.header_fields == 'all': | ||
# TODO: header fields might vary across file types, thus prior sensing would be needed | ||
header_fields = file_headers[0].keys() | ||
else: | ||
header_fields = opts.header_fields.split(',') | ||
|
||
diff = get_headers_diff(file_headers, header_fields) | ||
data_diff = get_data_diff(files) | ||
|
||
if data_diff: | ||
diff['DATA(md5)'] = data_diff | ||
|
||
if diff: | ||
out.write(display_diff(files, diff)) | ||
raise SystemExit(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this now preferred to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess it is where python violated its own Zen:
so seems to be exactly the same thing... but it is easier to explain that "we will raise exception and then test in the test that it was raised" ;) |
||
|
||
else: | ||
out.write("These files are identical.\n") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and again no test to test this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my latest commit I added the simple/integration test. I'm not sure how I would go about the advanced logic test though? |
||
raise SystemExit(0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wouldn't hold this PR up for this, but just FYI
optparse
has been deprecated, andargparse
is the supported argument parser.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah... we should convert all the cmdline tools which still use optparse in some one PR ;)