diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index e95f519e02..b3faa21a1f 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -34,7 +34,7 @@ from .volumeutils import array_from_file, apply_read_scaling from .fileslice import fileslice from .keywordonly import kw_only_meth -from .openers import ImageOpener, HAVE_INDEXED_GZIP +from . import openers """This flag controls whether a new file handle is created every time an image @@ -43,14 +43,18 @@ ``True``, ``False``, or ``'auto'``. If ``True``, a single file handle is created and used. If ``False``, a new -file handle is created every time the image is accessed. If ``'auto'``, and -the optional ``indexed_gzip`` dependency is present, a single file handle is -created and persisted. If ``indexed_gzip`` is not available, behaviour is the -same as if ``keep_file_open is False``. +file handle is created every time the image is accessed. For gzip files, if +``'auto'``, and the optional ``indexed_gzip`` dependency is present, a single +file handle is created and persisted. If ``indexed_gzip`` is not available, +behaviour is the same as if ``keep_file_open is False``. If this is set to any other value, attempts to create an ``ArrayProxy`` without specifying the ``keep_file_open`` flag will result in a ``ValueError`` being raised. + +.. warning:: Setting this flag to a value of ``'auto'`` will become deprecated + behaviour in version 2.4.0. Support for ``'auto'`` will be removed + in version 3.0.0. """ KEEP_FILE_OPEN_DEFAULT = False @@ -187,9 +191,9 @@ def _should_keep_file_open(self, file_like, keep_file_open): - If ``file_like`` is a file(-like) object, ``False`` is returned. Otherwise, ``file_like`` is assumed to be a file name. - - if ``file_like`` ends with ``'gz'``, and the ``indexed_gzip`` - library is available, ``True`` is returned. - - Otherwise, ``False`` is returned. + - If ``keep_file_open`` is ``auto``, and ``indexed_gzip`` is + not available, ``False`` is returned. + - Otherwise, the value of ``keep_file_open`` is returned unchanged. Parameters ---------- @@ -203,23 +207,21 @@ def _should_keep_file_open(self, file_like, keep_file_open): ------- The value of ``keep_file_open`` that will be used by this - ``ArrayProxy``. + ``ArrayProxy``, and passed through to ``ImageOpener`` instances. """ if keep_file_open is None: keep_file_open = KEEP_FILE_OPEN_DEFAULT - # if keep_file_open is True/False, we do what the user wants us to do - if isinstance(keep_file_open, bool): - return keep_file_open - if keep_file_open != 'auto': + if keep_file_open not in ('auto', True, False): raise ValueError('keep_file_open should be one of {None, ' '\'auto\', True, False}') - # file_like is a handle - keep_file_open is irrelevant if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): return False - # Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set - # keep_file_open to True, else we set it to False - return HAVE_INDEXED_GZIP and file_like.endswith('gz') + # don't have indexed_gzip - auto -> False + if keep_file_open == 'auto' and not (openers.HAVE_INDEXED_GZIP and + file_like.endswith('.gz')): + return False + return keep_file_open @property @deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0') @@ -265,10 +267,11 @@ def _get_fileobj(self): """ if self._keep_file_open: if not hasattr(self, '_opener'): - self._opener = ImageOpener(self.file_like, keep_open=True) + self._opener = openers.ImageOpener( + self.file_like, keep_open=self._keep_file_open) yield self._opener else: - with ImageOpener(self.file_like, keep_open=False) as opener: + with openers.ImageOpener(self.file_like) as opener: yield opener def get_unscaled(self): diff --git a/nibabel/benchmarks/bench_array_to_file.py b/nibabel/benchmarks/bench_array_to_file.py index f55b8a2583..36921a106a 100644 --- a/nibabel/benchmarks/bench_array_to_file.py +++ b/nibabel/benchmarks/bench_array_to_file.py @@ -16,6 +16,7 @@ from __future__ import division, print_function import sys +from io import BytesIO # NOQA import numpy as np diff --git a/nibabel/benchmarks/bench_arrayproxy_slicing.py b/nibabel/benchmarks/bench_arrayproxy_slicing.py index 321a0779d5..c880aa0700 100644 --- a/nibabel/benchmarks/bench_arrayproxy_slicing.py +++ b/nibabel/benchmarks/bench_arrayproxy_slicing.py @@ -15,7 +15,6 @@ """ from timeit import timeit -import contextlib import gc import itertools as it import numpy as np @@ -51,7 +50,7 @@ ('?', '?', '?', ':'), ] -KEEP_OPENS = [False, True] +KEEP_OPENS = [False, True, 'auto'] if HAVE_INDEXED_GZIP: HAVE_IGZIP = [False, True] @@ -59,16 +58,6 @@ HAVE_IGZIP = [False] -@contextlib.contextmanager -def patch_indexed_gzip(have_igzip): - - atts = ['nibabel.openers.HAVE_INDEXED_GZIP', - 'nibabel.arrayproxy.HAVE_INDEXED_GZIP'] - - with mock.patch(atts[0], have_igzip), mock.patch(atts[1], have_igzip): - yield - - def bench_arrayproxy_slicing(): print_git_title('\nArrayProxy gzip slicing') @@ -154,14 +143,15 @@ def fmt_sliceobj(sliceobj): # load uncompressed and compressed versions of the image img = nib.load(testfile, keep_file_open=keep_open) - with patch_indexed_gzip(have_igzip): + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', have_igzip): imggz = nib.load(testfilegz, keep_file_open=keep_open) def basefunc(): img.dataobj[fix_sliceobj(sliceobj)] def testfunc(): - with patch_indexed_gzip(have_igzip): + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', + have_igzip): imggz.dataobj[fix_sliceobj(sliceobj)] # make sure nothing is floating around from the previous test diff --git a/nibabel/openers.py b/nibabel/openers.py index 0f57fa406a..f64ab23b37 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -18,16 +18,22 @@ # is indexed_gzip present and modern? try: - from indexed_gzip import SafeIndexedGzipFile, __version__ as version + import indexed_gzip as igzip + version = igzip.__version__ HAVE_INDEXED_GZIP = True - if StrictVersion(version) < StrictVersion('0.6.0'): + # < 0.7 - no good + if StrictVersion(version) < StrictVersion('0.7.0'): warnings.warn('indexed_gzip is present, but too old ' - '(>= 0.6.0 required): {})'.format(version)) + '(>= 0.7.0 required): {})'.format(version)) HAVE_INDEXED_GZIP = False - - del version + # >= 0.8 SafeIndexedGzipFile renamed to IndexedGzipFile + elif StrictVersion(version) < StrictVersion('0.8.0'): + IndexedGzipFile = igzip.SafeIndexedGzipFile + else: + IndexedGzipFile = igzip.IndexedGzipFile + del igzip, version except ImportError: HAVE_INDEXED_GZIP = False @@ -80,9 +86,11 @@ def readinto(self, buf): def _gzip_open(filename, mode='rb', compresslevel=9, keep_open=False): - # use indexed_gzip if possible for faster read access - if keep_open and mode == 'rb' and HAVE_INDEXED_GZIP: - gzip_file = SafeIndexedGzipFile(filename) + # use indexed_gzip if possible for faster read access. If keep_open == + # True, we tell IndexedGzipFile to keep the file handle open. Otherwise + # the IndexedGzipFile will close/open the file on each read. + if HAVE_INDEXED_GZIP and mode == 'rb': + gzip_file = IndexedGzipFile(filename, drop_handles=not keep_open) # Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class # defined above) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 537ed8f87d..fd6cfd5a44 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -20,8 +20,7 @@ import numpy as np -from ..arrayproxy import (ArrayProxy, KEEP_FILE_OPEN_DEFAULT, is_proxy, - reshape_dataobj) +from ..arrayproxy import (ArrayProxy, is_proxy, reshape_dataobj) from ..openers import ImageOpener from ..nifti1 import Nifti1Header @@ -342,15 +341,20 @@ def check_mmap(hdr, offset, proxy_class, # An image opener class which counts how many instances of itself have been # created class CountingImageOpener(ImageOpener): - num_openers = 0 - def __init__(self, *args, **kwargs): - super(CountingImageOpener, self).__init__(*args, **kwargs) CountingImageOpener.num_openers += 1 +def _count_ImageOpeners(proxy, data, voxels): + CountingImageOpener.num_openers = 0 + for i in range(voxels.shape[0]): + x, y, z = [int(c) for c in voxels[i, :]] + assert proxy[x, y, z] == x * 100 + y * 10 + z + return CountingImageOpener.num_openers + + def test_keep_file_open_true_false_invalid(): # Test the behaviour of the keep_file_open __init__ flag, when it is set to # True or False. @@ -365,22 +369,15 @@ def test_keep_file_open_true_false_invalid(): # Test that ArrayProxy(keep_file_open=True) only creates one file # handle, and that ArrayProxy(keep_file_open=False) creates a file # handle on every data access. - with mock.patch('nibabel.arrayproxy.ImageOpener', CountingImageOpener): + with mock.patch('nibabel.openers.ImageOpener', CountingImageOpener): proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=False) assert not proxy_no_kfp._keep_file_open - for i in range(voxels.shape[0]): - x , y, z = [int(c) for c in voxels[i, :]] - assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z - assert CountingImageOpener.num_openers == i + 1 - CountingImageOpener.num_openers = 0 + assert _count_ImageOpeners(proxy_no_kfp, data, voxels) == 10 proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=True) assert proxy_kfp._keep_file_open - for i in range(voxels.shape[0]): - x , y, z = [int(c) for c in voxels[i, :]] - assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z - assert CountingImageOpener.num_openers == 1 + assert _count_ImageOpeners(proxy_kfp, data, voxels) == 1 del proxy_kfp del proxy_no_kfp # Test that the keep_file_open flag has no effect if an open file @@ -389,20 +386,15 @@ def test_keep_file_open_true_false_invalid(): for kfo in (True, False, 'auto'): proxy = ArrayProxy(fobj, ((10, 10, 10), dtype), keep_file_open=kfo) - if kfo == 'auto': - kfo = False - assert proxy._keep_file_open is kfo + assert proxy._keep_file_open is False for i in range(voxels.shape[0]): + x, y, z = [int(c) for c in voxels[i, :]] assert proxy[x, y, z] == x * 100 + y * 10 + z assert not fobj.closed del proxy assert not fobj.closed assert fobj.closed # Test invalid values of keep_file_open - with assert_raises(ValueError): - ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=0) - with assert_raises(ValueError): - ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=1) with assert_raises(ValueError): ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=55) with assert_raises(ValueError): @@ -411,69 +403,99 @@ def test_keep_file_open_true_false_invalid(): ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto') -@contextlib.contextmanager -def patch_keep_file_open_default(value): - # Patch arrayproxy.KEEP_FILE_OPEN_DEFAULT with the given value - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', value): - yield - - def test_keep_file_open_auto(): # Test the behaviour of the keep_file_open __init__ flag, when it is set to - # 'auto' + # 'auto'. + # if indexed_gzip is present, the ArrayProxy should persist its ImageOpener. + # Otherwise the ArrayProxy should drop openers. dtype = np.float32 - data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + voxels = np.random.randint(0, 10, (10, 3)) with InTemporaryDirectory(): fname = 'testdata.gz' with gzip.open(fname, 'wb') as fobj: fobj.write(data.tostring(order='F')) - # If have_indexed_gzip, then keep_file_open should be True - with patch_indexed_gzip(True): + # If have_indexed_gzip, then the arrayproxy should create one + # ImageOpener + with patch_indexed_gzip(True), \ + mock.patch('nibabel.openers.ImageOpener', CountingImageOpener): + CountingImageOpener.num_openers = 0 proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') - assert proxy._keep_file_open + assert proxy._keep_file_open == 'auto' + assert _count_ImageOpeners(proxy, data, voxels) == 1 # If no have_indexed_gzip, then keep_file_open should be False - with patch_indexed_gzip(False): + with patch_indexed_gzip(False), \ + mock.patch('nibabel.openers.ImageOpener', CountingImageOpener): + CountingImageOpener.num_openers = 0 + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open='auto') + assert proxy._keep_file_open is False + assert _count_ImageOpeners(proxy, data, voxels) == 10 + # If not a gzip file, keep_file_open should be False + fname = 'testdata' + with open(fname, 'wb') as fobj: + fobj.write(data.tostring(order='F')) + # regardless of whether indexed_gzip is present or not + with patch_indexed_gzip(True), \ + mock.patch('nibabel.openers.ImageOpener', CountingImageOpener): + CountingImageOpener.num_openers = 0 + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open='auto') + assert proxy._keep_file_open is False + assert _count_ImageOpeners(proxy, data, voxels) == 10 + with patch_indexed_gzip(False), \ + mock.patch('nibabel.openers.ImageOpener', CountingImageOpener): + CountingImageOpener.num_openers = 0 proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') - assert not proxy._keep_file_open + assert proxy._keep_file_open is False + assert _count_ImageOpeners(proxy, data, voxels) == 10 + + +@contextlib.contextmanager +def patch_keep_file_open_default(value): + # Patch arrayproxy.KEEP_FILE_OPEN_DEFAULT with the given value + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', value): + yield def test_keep_file_open_default(): # Test the behaviour of the keep_file_open __init__ flag, when the # arrayproxy.KEEP_FILE_OPEN_DEFAULT value is changed dtype = np.float32 - data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) with InTemporaryDirectory(): fname = 'testdata.gz' with gzip.open(fname, 'wb') as fobj: fobj.write(data.tostring(order='F')) - # The default value of KEEP_FILE_OPEN_DEFAULT should cause - # keep_file_open to be False, regardless of whether or not indexed_gzip - # is present - assert KEEP_FILE_OPEN_DEFAULT is False - with patch_indexed_gzip(False): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert not proxy._keep_file_open - with patch_indexed_gzip(True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert not proxy._keep_file_open - # KEEP_FILE_OPEN_DEFAULT=True should cause keep_file_open to be True, - # regardless of whether or not indexed_gzip is present - with patch_keep_file_open_default(True), patch_indexed_gzip(True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert proxy._keep_file_open - with patch_keep_file_open_default(True), patch_indexed_gzip(False): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert proxy._keep_file_open - # KEEP_FILE_OPEN_DEFAULT=auto should cause keep_file_open to be True - # or False, depending on whether indeed_gzip is present, - with patch_keep_file_open_default('auto'), patch_indexed_gzip(True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert proxy._keep_file_open - with patch_keep_file_open_default('auto'), patch_indexed_gzip(False): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert not proxy._keep_file_open + # If KEEP_FILE_OPEN_DEFAULT is False, ArrayProxy instances should + # interpret keep_file_open as False + with patch_keep_file_open_default(False): + with patch_indexed_gzip(False): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open is False + with patch_indexed_gzip(True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open is False + # If KEEP_FILE_OPEN_DEFAULT is True, ArrayProxy instances should + # interpret keep_file_open as True + with patch_keep_file_open_default(True): + with patch_indexed_gzip(False): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open is True + with patch_indexed_gzip(True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open is True + # If KEEP_FILE_OPEN_DEFAULT is auto, ArrayProxy instances should + # interpret it as auto if indexed_gzip is present, False otherwise. + with patch_keep_file_open_default('auto'): + with patch_indexed_gzip(False): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open is False + with patch_indexed_gzip(True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open == 'auto' # KEEP_FILE_OPEN_DEFAULT=any other value should cuse an error to be # raised with patch_keep_file_open_default('badvalue'): diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index ebde721732..ca1654bf9a 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -12,9 +12,10 @@ from gzip import GzipFile from bz2 import BZ2File from io import BytesIO, UnsupportedOperation +from distutils.version import StrictVersion from ..py3k import asstr, asbytes -from ..openers import Opener, ImageOpener +from ..openers import Opener, ImageOpener, HAVE_INDEXED_GZIP from ..tmpdirs import InTemporaryDirectory from ..volumeutils import BinOpener @@ -67,6 +68,8 @@ def test_Opener_various(): # Check we can do all sorts of files here message = b"Oh what a giveaway" bz2_fileno = hasattr(BZ2File, 'fileno') + if HAVE_INDEXED_GZIP: + import indexed_gzip as igzip with InTemporaryDirectory(): sobj = BytesIO() for input in ('test.txt', @@ -86,6 +89,11 @@ def test_Opener_various(): assert_raises(UnsupportedOperation, fobj.fileno) elif input.endswith('.bz2') and not bz2_fileno: assert_raises(AttributeError, fobj.fileno) + # indexed gzip is used by default, and drops file + # handles by default, so we don't have a fileno. + elif input.endswith('gz') and HAVE_INDEXED_GZIP and \ + StrictVersion(igzip.__version__) >= StrictVersion('0.7.0'): + assert_raises(igzip.NoHandleError, fobj.fileno) else: # Just check there is a fileno assert_not_equal(fobj.fileno(), 0) @@ -97,9 +105,10 @@ def test_BinOpener(): BinOpener, 'test.txt', 'r') -class MockIndexedGzipFile(object): +class MockIndexedGzipFile(GzipFile): def __init__(self, *args, **kwargs): - pass + kwargs.pop('drop_handles', False) + super(MockIndexedGzipFile, self).__init__(*args, **kwargs) @contextlib.contextmanager @@ -107,12 +116,11 @@ def patch_indexed_gzip(state): # Make it look like we do (state==True) or do not (state==False) have # the indexed gzip module. if state: - values = (True, True, MockIndexedGzipFile) + values = (True, MockIndexedGzipFile) else: - values = (False, False, GzipFile) + values = (False, GzipFile) with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', values[0]), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', values[1]), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', values[2], + mock.patch('nibabel.openers.IndexedGzipFile', values[1], create=True): yield @@ -132,14 +140,18 @@ def test_Opener_gzip_type(): # Each test is specified by a tuple containing: # (indexed_gzip present, Opener kwargs, expected file type) tests = [ - (False, {'mode' : 'rb', 'keep_open' : True}, GzipFile), - (False, {'mode' : 'rb', 'keep_open' : False}, GzipFile), - (False, {'mode' : 'wb', 'keep_open' : True}, GzipFile), - (False, {'mode' : 'wb', 'keep_open' : False}, GzipFile), - (True, {'mode' : 'rb', 'keep_open' : True}, MockIndexedGzipFile), - (True, {'mode' : 'rb', 'keep_open' : False}, GzipFile), - (True, {'mode' : 'wb', 'keep_open' : True}, GzipFile), - (True, {'mode' : 'wb', 'keep_open' : False}, GzipFile), + (False, {'mode' : 'rb', 'keep_open' : True}, GzipFile), + (False, {'mode' : 'rb', 'keep_open' : False}, GzipFile), + (False, {'mode' : 'rb', 'keep_open' : 'auto'}, GzipFile), + (False, {'mode' : 'wb', 'keep_open' : True}, GzipFile), + (False, {'mode' : 'wb', 'keep_open' : False}, GzipFile), + (False, {'mode' : 'wb', 'keep_open' : 'auto'}, GzipFile), + (True, {'mode' : 'rb', 'keep_open' : True}, MockIndexedGzipFile), + (True, {'mode' : 'rb', 'keep_open' : False}, MockIndexedGzipFile), + (True, {'mode' : 'rb', 'keep_open' : 'auto'}, MockIndexedGzipFile), + (True, {'mode' : 'wb', 'keep_open' : True}, GzipFile), + (True, {'mode' : 'wb', 'keep_open' : False}, GzipFile), + (True, {'mode' : 'wb', 'keep_open' : 'auto'}, GzipFile), ] for test in tests: @@ -256,11 +268,10 @@ class StrictOpener(Opener): assert_true(isinstance(fobj.fobj, file_class)) elif lext == 'gz': try: - from indexed_gzip import SafeIndexedGzipFile + from ..openers import IndexedGzipFile except ImportError: - SafeIndexedGzipFile = GzipFile - assert_true(isinstance(fobj.fobj, (GzipFile, - SafeIndexedGzipFile))) + IndexedGzipFile = GzipFile + assert_true(isinstance(fobj.fobj, (GzipFile, IndexedGzipFile))) else: assert_true(isinstance(fobj.fobj, BZ2File))