diff --git a/.travis.yml b/.travis.yml index 28ac4fa5f4..f2d22fb02a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ cache: - $HOME/.cache/pip env: global: - - DEPENDS="six numpy scipy matplotlib h5py pillow pydicom" + - DEPENDS="six numpy scipy matplotlib h5py pillow pydicom hypothesis" - OPTIONAL_DEPENDS="" - INSTALL_TYPE="setup" - EXTRA_WHEELS="https://5cf40426d9f06eb7461d-6fe47d9331aba7cd62fc36c7196769e4.ssl.cf2.rackcdn.com" @@ -95,7 +95,7 @@ before_install: - source venv/bin/activate - python --version # just to check - pip install -U pip wheel # needed at one point - - retry pip install nose flake8 mock # always + - retry pip install nose flake8 mock hypothesis # always - pip install $EXTRA_PIP_FLAGS $DEPENDS $OPTIONAL_DEPENDS - if [ "${COVERAGE}" == "1" ]; then pip install coverage; diff --git a/appveyor.yml b/appveyor.yml index e41aee90c8..3bb9c3d074 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -20,8 +20,7 @@ install: - SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% # Install the dependencies of the project. - - pip install numpy scipy matplotlib nose h5py mock - - pip install pydicom + - pip install numpy scipy matplotlib nose h5py mock hypothesis pydicom - pip install . - SET NIBABEL_DATA_DIR=%CD%\nibabel-data diff --git a/bin/nib-diff b/bin/nib-diff new file mode 100755 index 0000000000..2ae66dda9d --- /dev/null +++ b/bin/nib-diff @@ -0,0 +1,17 @@ +#!python +# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the NiBabel package for the +# copyright and license terms. +# +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +""" +Quick diff summary for a set of neuroimaging files +""" + +from nibabel.cmdline.diff import main + +if __name__ == '__main__': + main() diff --git a/dev-requirements.txt b/dev-requirements.txt index f63af96cf4..014b7a9d01 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,3 +2,4 @@ -r requirements.txt nose mock +hypothesis \ No newline at end of file diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py new file mode 100755 index 0000000000..21cd7b40a9 --- /dev/null +++ b/nibabel/cmdline/diff.py @@ -0,0 +1,207 @@ +#!python +# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the NiBabel package for the +# copyright and license terms. +# +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +""" +Quick summary of the differences among a set of neuroimaging files +""" +from __future__ import division, print_function, absolute_import + +import re +import sys +from collections import OrderedDict +from optparse import OptionParser, Option + +import numpy as np + +import nibabel as nib +import nibabel.cmdline.utils +import hashlib +import os + + +def get_opt_parser(): + # use module docstring for help output + p = OptionParser( + usage="%s [OPTIONS] [FILE ...]\n\n" % sys.argv[0] + __doc__, + version="%prog " + nib.__version__) + + p.add_options([ + Option("-v", "--verbose", action="count", + dest="verbose", default=0, + help="Make more noise. Could be specified multiple times"), + + Option("-H", "--header-fields", + dest="header_fields", default='all', + help="Header fields (comma separated) to be printed as well (if present)"), + ]) + + return p + + +def are_values_different(*values): + """Generically compares values, returns true if different""" + value0 = values[0] + values = values[1:] # to ensure that the first value isn't compared with itself + + for value in values: + try: # we sometimes don't want NaN values + if np.any(np.isnan(value0)) and np.any(np.isnan(value)): # if they're both NaN + break + elif np.any(np.isnan(value0)) or np.any(np.isnan(value)): # if only 1 is NaN + return True + + except TypeError: + pass + + if type(value0) != type(value): # if types are different, then we consider them different + return True + elif isinstance(value0, np.ndarray): + return np.any(value0 != value) + + elif value0 != value: + return True + + return False + + +def get_headers_diff(file_headers, names=None): + """Get difference between headers + + Parameters + ---------- + file_headers: list of actual headers (dicts) from files + names: list of header fields to test + + Returns + ------- + dict + str: list for each header field which differs, return list of + values per each file + """ + difference = OrderedDict() + fields = names + + if names is None: + fields = file_headers[0].keys() + + # for each header field + for field in fields: + values = [header.get(field) for header in file_headers] # get corresponding value + + # if these values are different, store them in a dictionary + if are_values_different(*values): + difference[field] = values + + return difference + + +def get_data_diff(files): + """Get difference between md5 values + + Parameters + ---------- + files: list of actual files + + Returns + ------- + list + np.array: md5 values of respective files + """ + + md5sums = [ + hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest() + for f in files + ] + + if len(set(md5sums)) == 1: + return [] + + return md5sums + + +def display_diff(files, diff): + """Format header differences into a nice string + + Parameters + ---------- + files: list of files that were compared so we can print their names + diff: dict of different valued header fields + + Returns + ------- + str + string-formatted table of differences + """ + output = "" + field_width = "{:<15}" + value_width = "{:<55}" + + output += "These files are different.\n" + output += field_width.format('Field') + + for f in files: + output += value_width.format(os.path.basename(f)) + + output += "\n" + + for key, value in diff.items(): + output += field_width.format(key) + + for item in value: + item_str = str(item) + # Value might start/end with some invisible spacing characters so we + # would "condition" it on both ends a bit + item_str = re.sub('^[ \t]+', '<', item_str) + item_str = re.sub('[ \t]+$', '>', item_str) + # and also replace some other invisible symbols with a question + # mark + item_str = re.sub('[\x00]', '?', item_str) + output += value_width.format(item_str) + + output += "\n" + + return output + + +def main(args=None, out=None): + """Getting the show on the road""" + out = out or sys.stdout + parser = get_opt_parser() + (opts, files) = parser.parse_args(args) + + nibabel.cmdline.utils.verbose_level = opts.verbose + + if nibabel.cmdline.utils.verbose_level < 3: + # suppress nibabel format-compliance warnings + nib.imageglobals.logger.level = 50 + + assert len(files) >= 2, "Please enter at least two files" + + file_headers = [nib.load(f).header for f in files] + + # signals "all fields" + if opts.header_fields == 'all': + # TODO: header fields might vary across file types, thus prior sensing would be needed + header_fields = file_headers[0].keys() + else: + header_fields = opts.header_fields.split(',') + + diff = get_headers_diff(file_headers, header_fields) + data_diff = get_data_diff(files) + + if data_diff: + diff['DATA(md5)'] = data_diff + + if diff: + out.write(display_diff(files, diff)) + raise SystemExit(1) + + else: + out.write("These files are identical.\n") + raise SystemExit(0) diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py index 8dcd09e261..4aa387b6e5 100644 --- a/nibabel/cmdline/tests/test_utils.py +++ b/nibabel/cmdline/tests/test_utils.py @@ -5,13 +5,17 @@ Test running scripts """ -from numpy.testing import (assert_almost_equal, - assert_array_equal) - -from nose.tools import (assert_true, assert_false, assert_raises, - assert_equal, assert_not_equal) +from nose.tools import assert_equal +from numpy.testing import assert_raises +import nibabel as nib +import numpy as np from nibabel.cmdline.utils import * +from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff +from os.path import (join as pjoin) +from nibabel.testing import data_path +from collections import OrderedDict +from six import StringIO def test_table2string(): @@ -42,3 +46,119 @@ def get_test(self): assert_equal(safe_get(test, "test"), 2) assert_equal(safe_get(test, "failtest"), "-") + + +def test_get_headers_diff(): + fnames = [pjoin(data_path, f) + for f in ('standard.nii.gz', 'example4d.nii.gz')] + actual_difference = get_headers_diff([nib.load(f).header for f in fnames]) + expected_difference = OrderedDict([ + ("regular", [np.asarray("".encode("utf-8")), np.asarray("r".encode("utf-8"))]), + ("dim_info", [np.asarray(0).astype(dtype="uint8"), np.asarray(57).astype(dtype="uint8")]), + ("dim", [np.array([3, 4, 5, 7, 1, 1, 1, 1]).astype(dtype="int16"), + np.array([ 4, 128, 96, 24, 2, 1, 1, 1]).astype(dtype="int16")]), + ("datatype", [np.array(2).astype(dtype="uint8"), np.array(4).astype(dtype="uint8")]), + ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")]), + ("pixdim", [np.array([ 1., 1., 3., 2., 1., 1., 1., 1.]).astype(dtype="float32"), np.array( + [ -1.00000000e+00, 2.00000000e+00, 2.00000000e+00, 2.19999909e+00, 2.00000000e+03, 1.00000000e+00, + 1.00000000e+00, 1.00000000e+00]).astype(dtype="float32")]), + ("slice_end", [np.array(0).astype(dtype="uint8"), np.array(23).astype(dtype="uint8")]), + ("xyzt_units", [np.array(0).astype(dtype="uint8"), np.array(10).astype(dtype="uint8")]), + ("cal_max", [np.array(0.0).astype(dtype="float32"), np.asarray(1162.0).astype(dtype="float32")]), + ("descrip", [np.array("".encode("utf-8")).astype(dtype="S80"), + np.array("FSL3.3\x00 v2.25 NIfTI-1 Single file format".encode("utf-8")).astype(dtype="S80")]), + ("qform_code", [np.array(0).astype(dtype="int16"), np.array(1).astype(dtype="int16")]), + ("sform_code", [np.array(2).astype(dtype="int16"), np.array(1).astype(dtype="int16")]), + ("quatern_b", [np.array(0.0).astype(dtype="float32"), + np.array(-1.9451068140294884e-26).astype(dtype="float32")]), + ("quatern_c", [np.array(0.0).astype(dtype="float32"), np.array(-0.9967085123062134).astype(dtype="float32")]), + ("quatern_d", [np.array(0.0).astype(dtype="float32"), np.array(-0.0810687392950058).astype(dtype="float32")]), + ("qoffset_x", [np.array(0.0).astype(dtype="float32"), np.array(117.8551025390625).astype(dtype="float32")]), + ("qoffset_y", [np.array(0.0).astype(dtype="float32"), np.array(-35.72294235229492).astype(dtype="float32")]), + ("qoffset_z", [np.array(0.0).astype(dtype="float32"), np.array(-7.248798370361328).astype(dtype="float32")]), + ("srow_x", [np.array([ 1., 0., 0., 0.]).astype(dtype="float32"), + np.array([ -2.00000000e+00, 6.71471565e-19, 9.08102451e-18, + 1.17855103e+02]).astype(dtype="float32")]), + ("srow_y", [np.array([ 0., 3., 0., 0.]).astype(dtype="float32"), + np.array([ -6.71471565e-19, 1.97371149e+00, -3.55528235e-01, -3.57229424e+01]).astype(dtype="float32")]), + ("srow_z", [np.array([ 0., 0., 2., 0.]).astype(dtype="float32"), + np.array([ 8.25548089e-18, 3.23207617e-01, 2.17108178e+00, + -7.24879837e+00]).astype(dtype="float32")])]) + + np.testing.assert_equal(actual_difference, expected_difference) + + +def test_display_diff(): + bogus_names = ["hellokitty.nii.gz", "privettovarish.nii.gz"] + + dict_values = OrderedDict([ + ("datatype", [np.array(2).astype(dtype="uint8"), np.array(4).astype(dtype="uint8")]), + ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")]) + ]) + + expected_output = "These files are different.\n" + "Field hellokitty.nii.gz" \ + " " \ + "privettovarish.nii.gz \n" \ + "datatype " \ + "2 " \ + "4 \n" \ + "bitpix " \ + "8 16" \ + " " \ + "\n" + + assert_equal(display_diff(bogus_names, dict_values), expected_output) + + +def test_get_data_diff(): + # testing for identical files specifically as md5 may vary by computer + test_names = [pjoin(data_path, f) + for f in ('standard.nii.gz', 'standard.nii.gz')] + assert_equal(get_data_diff(test_names), []) + + +def test_main(): + test_names = [pjoin(data_path, f) + for f in ('standard.nii.gz', 'example4d.nii.gz')] + expected_difference = OrderedDict([ + ("regular", [np.asarray("".encode("utf-8")), np.asarray("r".encode("utf-8"))]), + ("dim_info", [np.asarray(0).astype(dtype="uint8"), np.asarray(57).astype(dtype="uint8")]), + ("dim", [np.array([3, 4, 5, 7, 1, 1, 1, 1]).astype(dtype="int16"), + np.array([4, 128, 96, 24, 2, 1, 1, 1]).astype(dtype="int16")]), + ("datatype", [np.array(2).astype(dtype="uint8"), np.array(4).astype(dtype="uint8")]), + ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")]), + ("pixdim", [np.array([1., 1., 3., 2., 1., 1., 1., 1.]).astype(dtype="float32"), np.array( + [-1.00000000e+00, 2.00000000e+00, 2.00000000e+00, 2.19999909e+00, 2.00000000e+03, 1.00000000e+00, + 1.00000000e+00, 1.00000000e+00]).astype(dtype="float32")]), + ("slice_end", [np.array(0).astype(dtype="uint8"), np.array(23).astype(dtype="uint8")]), + ("xyzt_units", [np.array(0).astype(dtype="uint8"), np.array(10).astype(dtype="uint8")]), + ("cal_max", [np.array(0.0).astype(dtype="float32"), np.asarray(1162.0).astype(dtype="float32")]), + ("descrip", [np.array("".encode("utf-8")).astype(dtype="S80"), + np.array("FSL3.3\x00 v2.25 NIfTI-1 Single file format".encode("utf-8")).astype(dtype="S80")]), + ("qform_code", [np.array(0).astype(dtype="int16"), np.array(1).astype(dtype="int16")]), + ("sform_code", [np.array(2).astype(dtype="int16"), np.array(1).astype(dtype="int16")]), + ("quatern_b", [np.array(0.0).astype(dtype="float32"), + np.array(-1.9451068140294884e-26).astype(dtype="float32")]), + ("quatern_c", [np.array(0.0).astype(dtype="float32"), np.array(-0.9967085123062134).astype(dtype="float32")]), + ("quatern_d", [np.array(0.0).astype(dtype="float32"), np.array(-0.0810687392950058).astype(dtype="float32")]), + ("qoffset_x", [np.array(0.0).astype(dtype="float32"), np.array(117.8551025390625).astype(dtype="float32")]), + ("qoffset_y", [np.array(0.0).astype(dtype="float32"), np.array(-35.72294235229492).astype(dtype="float32")]), + ("qoffset_z", [np.array(0.0).astype(dtype="float32"), np.array(-7.248798370361328).astype(dtype="float32")]), + ("srow_x", [np.array([1., 0., 0., 0.]).astype(dtype="float32"), + np.array([-2.00000000e+00, 6.71471565e-19, 9.08102451e-18, + 1.17855103e+02]).astype(dtype="float32")]), + ("srow_y", [np.array([0., 3., 0., 0.]).astype(dtype="float32"), + np.array([-6.71471565e-19, 1.97371149e+00, -3.55528235e-01, -3.57229424e+01]).astype( + dtype="float32")]), + ("srow_z", [np.array([0., 0., 2., 0.]).astype(dtype="float32"), + np.array([8.25548089e-18, 3.23207617e-01, 2.17108178e+00, + -7.24879837e+00]).astype(dtype="float32")]), + ('DATA(md5)', ['0a2576dd6badbb25bfb3b12076df986b', 'b0abbc492b4fd533b2c80d82570062cf'])]) + + with assert_raises(SystemExit): + np.testing.assert_equal(main(test_names, StringIO()), expected_difference) + + test_names_2 = [pjoin(data_path, f) for f in ('standard.nii.gz', 'standard.nii.gz')] + + with assert_raises(SystemExit): + assert_equal(main(test_names_2, StringIO()), "These files are identical.") diff --git a/nibabel/tests/test_diff.py b/nibabel/tests/test_diff.py new file mode 100644 index 0000000000..2dd1ef9b93 --- /dev/null +++ b/nibabel/tests/test_diff.py @@ -0,0 +1,68 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +""" Test diff +""" +from __future__ import division, print_function, absolute_import + +from os.path import (dirname, join as pjoin, abspath) +import numpy as np + +from hypothesis import given +import hypothesis.strategies as st + + +DATA_PATH = abspath(pjoin(dirname(__file__), 'data')) + +from nibabel.cmdline.diff import are_values_different + +# TODO: MAJOR TO DO IS TO FIGURE OUT HOW TO USE HYPOTHESIS FOR LONGER LIST LENGTHS WHILE STILL CONTROLLING FOR OUTCOMES + + +@given(st.data()) +def test_diff_values_int(data): + x = data.draw(st.integers(), label='x') + y = data.draw(st.integers(min_value=x + 1), label='x+1') + z = data.draw(st.integers(max_value=x - 1), label='x-1') + + assert not are_values_different(x, x) + assert are_values_different(x, y) + assert are_values_different(x, z) + assert are_values_different(y, z) + + +@given(st.data()) +def test_diff_values_float(data): + x = data.draw(st.just(0), label='x') + y = data.draw(st.floats(min_value=1e8), label='y') + z = data.draw(st.floats(max_value=-1e8), label='z') + + assert not are_values_different(x, x) + assert are_values_different(x, y) + assert are_values_different(x, z) + assert are_values_different(y, z) + + +@given(st.data()) +def test_diff_values_mixed(data): + type_float = data.draw(st.floats(), label='float') + type_int = data.draw(st.integers(), label='int') + type_none = data.draw(st.none(), label='none') + + assert are_values_different(type_float, type_int) + assert are_values_different(type_float, type_none) + assert are_values_different(type_int, type_none) + assert are_values_different(np.ndarray([0]), 'hey') + assert not are_values_different(type_none, type_none) + + +@given(st.data()) +def test_diff_values_array(data): + a = data.draw(st.lists(elements=st.integers(min_value=0), min_size=1)) + b = data.draw(st.lists(elements=st.integers(max_value=-1), min_size=1)) + c = data.draw(st.lists(elements=st.floats(min_value=1e8), min_size=1)) + d = data.draw(st.lists(elements=st.floats(max_value=-1e8), min_size=1)) + # TODO: Figure out a way to include 0 in lists (arrays) + + assert are_values_different(a, b) + assert are_values_different(c, d) + assert not are_values_different(a, a) diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py index 9756a16747..0aa404a939 100644 --- a/nibabel/tests/test_scripts.py +++ b/nibabel/tests/test_scripts.py @@ -67,6 +67,36 @@ def check_nib_ls_example4d(opts=[], hdrs_str="", other_str=""): assert_equal(fname, stdout[:len(fname)]) assert_re_in(expected_re, stdout[len(fname):]) + +def check_nib_diff_examples(): + fnames = [pjoin(DATA_PATH, f) + for f in ('standard.nii.gz', 'example4d.nii.gz')] + code, stdout, stderr = run_command(['nib-diff'] + fnames, check_code=False) + checked_fields = ["Field", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end", + "xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b", + "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x", + "srow_y", "srow_z", "DATA(md5)"] + for item in checked_fields: + assert_true(item in stdout) + + fnames2 = [pjoin(DATA_PATH, f) + for f in ('example4d.nii.gz', 'example4d.nii.gz')] + code, stdout, stderr = run_command(['nib-diff'] + fnames2, check_code=False) + assert_equal(stdout, "These files are identical.") + + fnames3 = [pjoin(DATA_PATH, f) + for f in ('standard.nii.gz', 'example4d.nii.gz', 'example_nifti2.nii.gz')] + code, stdout, stderr = run_command(['nib-diff'] + fnames3, check_code=False) + for item in checked_fields: + assert_true(item in stdout) + + fnames4 = [pjoin(DATA_PATH, f) + for f in ('standard.nii.gz', 'standard.nii.gz', 'standard.nii.gz')] + code, stdout, stderr = run_command(['nib-diff'] + fnames4, check_code=False) + assert_equal(stdout, "These files are identical.") + + + @script_test def test_nib_ls(): yield check_nib_ls_example4d @@ -150,6 +180,11 @@ def test_help(): assert_equal(stderr, '') +@script_test +def test_nib_diff(): + yield check_nib_diff_examples + + @script_test def test_nib_nifti_dx(): # Test nib-nifti-dx script diff --git a/setup.py b/setup.py index b0f5bc093c..27f85d3e99 100755 --- a/setup.py +++ b/setup.py @@ -119,6 +119,7 @@ def main(**extra_args): pjoin('bin', 'nib-nifti-dx'), pjoin('bin', 'nib-tck2trk'), pjoin('bin', 'nib-trk2tck'), + pjoin('bin', 'nib-diff'), ], cmdclass = cmdclass, **extra_args