diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 18d7f67e3b..e95f519e02 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -265,10 +265,10 @@ def _get_fileobj(self): """ if self._keep_file_open: if not hasattr(self, '_opener'): - self._opener = ImageOpener(self.file_like) + self._opener = ImageOpener(self.file_like, keep_open=True) yield self._opener else: - with ImageOpener(self.file_like) as opener: + with ImageOpener(self.file_like, keep_open=False) as opener: yield opener def get_unscaled(self): diff --git a/nibabel/benchmarks/bench_array_to_file.py b/nibabel/benchmarks/bench_array_to_file.py index e627485bb9..f55b8a2583 100644 --- a/nibabel/benchmarks/bench_array_to_file.py +++ b/nibabel/benchmarks/bench_array_to_file.py @@ -19,11 +19,12 @@ import numpy as np - from .butils import print_git_title from numpy.testing import measure +from nibabel.volumeutils import array_to_file # NOQA + def bench_array_to_file(): rng = np.random.RandomState(20111001) diff --git a/nibabel/benchmarks/bench_arrayproxy_slicing.py b/nibabel/benchmarks/bench_arrayproxy_slicing.py new file mode 100644 index 0000000000..a824822d3c --- /dev/null +++ b/nibabel/benchmarks/bench_arrayproxy_slicing.py @@ -0,0 +1,202 @@ +"""Benchmarks for ArrayProxy slicing of gzipped and non-gzipped files + +Run benchmarks with:: + + import nibabel as nib + nib.bench() + +If you have doctests enabled by default in nose (with a noserc file or +environment variable), and you have a numpy version <= 1.6.1, this will also +run the doctests, let's hope they pass. + +Run this benchmark with: + + nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' /path/to/bench_arrayproxy_slicing.py +""" + +from timeit import timeit +import contextlib +import gc +import itertools as it +import numpy as np +import mock + +import nibabel as nib +from nibabel.tmpdirs import InTemporaryDirectory +from nibabel.openers import HAVE_INDEXED_GZIP + +from .butils import print_git_title +from ..rstutils import rst_table + +# if memory_profiler is installed, we get memory usage results +try: + from memory_profiler import memory_usage +except ImportError: + memory_usage = None + + +# Each test involves loading an image of shape SHAPE, and then slicing it +# NITERS times +NITERS = 50 +SHAPE = (100, 100, 100, 100) + +# One test is run for each combination of SLICEOBJS, KEEP_OPENS, and HAVE_IGZIP + +# ':' gets replaced with slice(None) +# '?' gets replaced with a random index into the relevant axis +# numbers (assumed to be between 0 and 1) get scaled to the axis shape +SLICEOBJS = [ + ('?', ':', ':', ':'), + (':', ':', ':', '?'), + ('?', '?', '?', ':'), +] + +KEEP_OPENS = [False, True] + +if HAVE_INDEXED_GZIP: + HAVE_IGZIP = [False, True] +else: + HAVE_IGZIP = [False] + + +@contextlib.contextmanager +def patch_indexed_gzip(have_igzip): + + atts = ['nibabel.openers.HAVE_INDEXED_GZIP', + 'nibabel.arrayproxy.HAVE_INDEXED_GZIP'] + + with mock.patch(atts[0], have_igzip), mock.patch(atts[1], have_igzip): + yield + + +def bench_arrayproxy_slicing(): + + print_git_title('\nArrayProxy gzip slicing') + + # each test is a tuple containing + # (HAVE_INDEXED_GZIP, keep_file_open, sliceobj) + tests = list(it.product(HAVE_IGZIP, KEEP_OPENS, SLICEOBJS)) + + # remove tests where HAVE_INDEXED_GZIP is True and keep_file_open is False, + # because if keep_file_open is False, HAVE_INDEXED_GZIP has no effect + tests = [t for t in tests if not (t[0] and not t[1])] + + testfile = 'testfile.nii' + testfilegz = 'test.nii.gz' + + def get_test_label(test): + have_igzip = test[0] + keep_open = test[1] + + if not (have_igzip and keep_open): + return 'gzip' + else: + return 'indexed_gzip' + + def fix_sliceobj(sliceobj): + new_sliceobj = [] + for i, s in enumerate(sliceobj): + if s == ':': + new_sliceobj.append(slice(None)) + elif s == '?': + new_sliceobj.append(np.random.randint(0, SHAPE[i])) + else: + new_sliceobj.append(int(s * SHAPE[i])) + return tuple(new_sliceobj) + + def fmt_sliceobj(sliceobj): + slcstr = [] + for i, s in enumerate(sliceobj): + if s in ':?': + slcstr.append(s) + else: + slcstr.append(str(int(s * SHAPE[i]))) + return '[{}]'.format(', '.join(slcstr)) + + with InTemporaryDirectory(): + + print('Generating test data... ({} MB)'.format( + int(round(np.prod(SHAPE) * 4 / 1048576.)))) + + data = np.array(np.random.random(SHAPE), dtype=np.float32) + + # zero out 10% of voxels so gzip has something to compress + mask = np.random.random(SHAPE[:3]) > 0.1 + if len(SHAPE) > 3: + data[mask, :] = 0 + else: + data[mask] = 0 + + # save uncompressed and compressed versions of the image + img = nib.nifti1.Nifti1Image(data, np.eye(4)) + nib.save(img, testfilegz) + nib.save(img, testfile) + + # each result is a tuple containing + # (label, keep_open, sliceobj, testtime, basetime, testmem, basemem) + # + # where "basetime" is the time taken to load and slice a memmapped + # (uncompressed)image, and "basemem" is memory usage for the same + results = [] + + # We use the same random seed for each slice object, + seeds = [np.random.randint(0, 2 ** 32) for s in SLICEOBJS] + + for ti, test in enumerate(tests): + + label = get_test_label(test) + have_igzip, keep_open, sliceobj = test + seed = seeds[SLICEOBJS.index(sliceobj)] + + print('Running test {} of {} ({})...'.format( + ti + 1, len(tests), label)) + + # load uncompressed and compressed versions of the image + img = nib.load(testfile, keep_file_open=keep_open) + + with patch_indexed_gzip(have_igzip): + imggz = nib.load(testfilegz, keep_file_open=keep_open) + + def basefunc(): + img.dataobj[fix_sliceobj(sliceobj)] + + def testfunc(): + with patch_indexed_gzip(have_igzip): + imggz.dataobj[fix_sliceobj(sliceobj)] + + # make sure nothing is floating around from the previous test + # iteration, so memory profiling is (hopefully) more accurate + gc.collect() + + if memory_usage is not None: + membaseline = max(memory_usage(lambda: None)) + testmem = max(memory_usage(testfunc)) - membaseline + basemem = max(memory_usage(basefunc)) - membaseline + else: + testmem = np.nan + basemem = np.nan + + # reset the random number generator, so test and baseline use the + # same slices + np.random.seed(seed) + testtime = float(timeit(testfunc, number=NITERS)) / float(NITERS) + np.random.seed(seed) + basetime = float(timeit(basefunc, number=NITERS)) / float(NITERS) + + results.append((label, keep_open, sliceobj, testtime, basetime, + testmem, basemem)) + + data = np.zeros((len(results), 4)) + data[:, 0] = [r[3] for r in results] + data[:, 1] = [r[4] for r in results] + try: + data[:, 2] = [r[3] / r[4] for r in results] + except: + data[:, 2] = np.nan + data[:, 3] = [r[5] - r[6] for r in results] + + rowlbls = ['Type {}, keep_open {}, slice {}'.format( + r[0], r[1], fmt_sliceobj(r[2])) for r in results] + collbls = ['Time', 'Baseline time', 'Time ratio', 'Memory deviation'] + + print(rst_table(data, rowlbls, collbls)) diff --git a/nibabel/benchmarks/bench_finite_range.py b/nibabel/benchmarks/bench_finite_range.py index 1d442ed379..5f268eb285 100644 --- a/nibabel/benchmarks/bench_finite_range.py +++ b/nibabel/benchmarks/bench_finite_range.py @@ -24,6 +24,8 @@ from numpy.testing import measure +from nibabel.volumeutils import finite_range # NOQA + def bench_finite_range(): rng = np.random.RandomState(20111001) diff --git a/nibabel/openers.py b/nibabel/openers.py index 04382f66bc..0f57fa406a 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -12,12 +12,23 @@ import bz2 import gzip import sys +import warnings from os.path import splitext +from distutils.version import StrictVersion -# is indexed_gzip present? +# is indexed_gzip present and modern? try: - from indexed_gzip import SafeIndexedGzipFile + from indexed_gzip import SafeIndexedGzipFile, __version__ as version + HAVE_INDEXED_GZIP = True + + if StrictVersion(version) < StrictVersion('0.6.0'): + warnings.warn('indexed_gzip is present, but too old ' + '(>= 0.6.0 required): {})'.format(version)) + HAVE_INDEXED_GZIP = False + + del version + except ImportError: HAVE_INDEXED_GZIP = False @@ -67,10 +78,10 @@ def readinto(self, buf): return n_read -def _gzip_open(filename, mode='rb', compresslevel=9): +def _gzip_open(filename, mode='rb', compresslevel=9, keep_open=False): # use indexed_gzip if possible for faster read access - if mode == 'rb' and HAVE_INDEXED_GZIP: + if keep_open and mode == 'rb' and HAVE_INDEXED_GZIP: gzip_file = SafeIndexedGzipFile(filename) # Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class @@ -101,12 +112,13 @@ class Opener(object): \*args : positional arguments passed to opening method when `fileish` is str. ``mode``, if not specified, is `rb`. ``compresslevel``, if relevant, and not specified, - is set from class variable ``default_compresslevel`` + is set from class variable ``default_compresslevel``. ``keep_open``, if + relevant, and not specified, is ``False``. \*\*kwargs : keyword arguments passed to opening method when `fileish` is str. Change of defaults as for \*args """ - gz_def = (_gzip_open, ('mode', 'compresslevel')) + gz_def = (_gzip_open, ('mode', 'compresslevel', 'keep_open')) bz2_def = (bz2.BZ2File, ('mode', 'buffering', 'compresslevel')) compress_ext_map = { '.gz': gz_def, @@ -132,8 +144,15 @@ def __init__(self, fileish, *args, **kwargs): # Set default mode if 'mode' not in full_kwargs: kwargs['mode'] = 'rb' + # Default compression level if 'compresslevel' in arg_names and 'compresslevel' not in kwargs: kwargs['compresslevel'] = self.default_compresslevel + # Default keep_open hint + if 'keep_open' in arg_names: + kwargs.setdefault('keep_open', False) + # Clear keep_open hint if it is not relevant for the file type + else: + kwargs.pop('keep_open', None) self.fobj = opener(fileish, *args, **kwargs) self._name = fileish self.me_opened = True diff --git a/nibabel/pkg_info.py b/nibabel/pkg_info.py index 11e22c8bdb..bc58c3bdc9 100644 --- a/nibabel/pkg_info.py +++ b/nibabel/pkg_info.py @@ -2,9 +2,9 @@ import sys import subprocess try: - from ConfigParser import ConfigParser + from ConfigParser import RawConfigParser as ConfigParser except ImportError: - from configparser import ConfigParser # python 3 + from configparser import RawConfigParser as ConfigParser # python 3 COMMIT_INFO_FNAME = 'COMMIT_INFO.txt' diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 61492f6038..70265a5860 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -33,6 +33,7 @@ from nibabel.testing import VIRAL_MEMMAP from .test_fileslice import slicer_samples +from .test_openers import patch_indexed_gzip class FunkyHeader(object): @@ -412,21 +413,6 @@ def test_keep_file_open_true_false_invalid(): ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto') -@contextlib.contextmanager -def patch_indexed_gzip(state): - # Make it look like we do (state==True) or do not (state==False) have - # the indexed gzip module. - if state: - values = (True, True, gzip.GzipFile) - else: - values = (False, False, None) - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', values[0]), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', values[1]), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', values[2], - create=True): - yield - - @contextlib.contextmanager def patch_keep_file_open_default(value): # Patch arrayproxy.KEEP_FILE_OPEN_DEFAULT with the given value diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index 92c74f42e9..ebde721732 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -8,6 +8,7 @@ ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## ''' Test for openers module ''' import os +import contextlib from gzip import GzipFile from bz2 import BZ2File from io import BytesIO, UnsupportedOperation @@ -96,16 +97,31 @@ def test_BinOpener(): BinOpener, 'test.txt', 'r') +class MockIndexedGzipFile(object): + def __init__(self, *args, **kwargs): + pass + + +@contextlib.contextmanager +def patch_indexed_gzip(state): + # Make it look like we do (state==True) or do not (state==False) have + # the indexed gzip module. + if state: + values = (True, True, MockIndexedGzipFile) + else: + values = (False, False, GzipFile) + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', values[0]), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', values[1]), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', values[2], + create=True): + yield + + def test_Opener_gzip_type(): # Test that BufferedGzipFile or IndexedGzipFile are used as appropriate data = 'this is some test data' fname = 'test.gz' - mockmod = mock.MagicMock() - - class MockIGZFile(object): - def __init__(self, *args, **kwargs): - pass with InTemporaryDirectory(): @@ -113,21 +129,23 @@ def __init__(self, *args, **kwargs): with GzipFile(fname, mode='wb') as f: f.write(data.encode()) - # test with indexd_gzip not present - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, - create=True): - assert isinstance(Opener(fname, mode='rb').fobj, GzipFile) - assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) - - # test with indexd_gzip present - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', MockIGZFile, - create=True): - assert isinstance(Opener(fname, mode='rb').fobj, MockIGZFile) - assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) + # Each test is specified by a tuple containing: + # (indexed_gzip present, Opener kwargs, expected file type) + tests = [ + (False, {'mode' : 'rb', 'keep_open' : True}, GzipFile), + (False, {'mode' : 'rb', 'keep_open' : False}, GzipFile), + (False, {'mode' : 'wb', 'keep_open' : True}, GzipFile), + (False, {'mode' : 'wb', 'keep_open' : False}, GzipFile), + (True, {'mode' : 'rb', 'keep_open' : True}, MockIndexedGzipFile), + (True, {'mode' : 'rb', 'keep_open' : False}, GzipFile), + (True, {'mode' : 'wb', 'keep_open' : True}, GzipFile), + (True, {'mode' : 'wb', 'keep_open' : False}, GzipFile), + ] + + for test in tests: + igzip_present, kwargs, expected = test + with patch_indexed_gzip(igzip_present): + assert isinstance(Opener(fname, **kwargs).fobj, expected) class TestImageOpener: @@ -238,10 +256,11 @@ class StrictOpener(Opener): assert_true(isinstance(fobj.fobj, file_class)) elif lext == 'gz': try: - from indexed_gzip import IndexedGzipFile + from indexed_gzip import SafeIndexedGzipFile except ImportError: - IndexedGzipFile = GzipFile - assert_true(isinstance(fobj.fobj, (GzipFile, IndexedGzipFile))) + SafeIndexedGzipFile = GzipFile + assert_true(isinstance(fobj.fobj, (GzipFile, + SafeIndexedGzipFile))) else: assert_true(isinstance(fobj.fobj, BZ2File))