Skip to content

ENH: Take advantage of IndexedGzipFile drop_handles flag #614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 22 additions & 19 deletions nibabel/arrayproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from .volumeutils import array_from_file, apply_read_scaling
from .fileslice import fileslice
from .keywordonly import kw_only_meth
from .openers import ImageOpener, HAVE_INDEXED_GZIP
from . import openers


"""This flag controls whether a new file handle is created every time an image
Expand All @@ -43,14 +43,18 @@
``True``, ``False``, or ``'auto'``.

If ``True``, a single file handle is created and used. If ``False``, a new
file handle is created every time the image is accessed. If ``'auto'``, and
the optional ``indexed_gzip`` dependency is present, a single file handle is
created and persisted. If ``indexed_gzip`` is not available, behaviour is the
same as if ``keep_file_open is False``.
file handle is created every time the image is accessed. For gzip files, if
``'auto'``, and the optional ``indexed_gzip`` dependency is present, a single
file handle is created and persisted. If ``indexed_gzip`` is not available,
behaviour is the same as if ``keep_file_open is False``.

If this is set to any other value, attempts to create an ``ArrayProxy`` without
specifying the ``keep_file_open`` flag will result in a ``ValueError`` being
raised.

.. warning:: Setting this flag to a value of ``'auto'`` will become deprecated
behaviour in version 2.4.0. Support for ``'auto'`` will be removed
in version 3.0.0.
"""
KEEP_FILE_OPEN_DEFAULT = False

Expand Down Expand Up @@ -187,9 +191,9 @@ def _should_keep_file_open(self, file_like, keep_file_open):

- If ``file_like`` is a file(-like) object, ``False`` is returned.
Otherwise, ``file_like`` is assumed to be a file name.
- if ``file_like`` ends with ``'gz'``, and the ``indexed_gzip``
library is available, ``True`` is returned.
- Otherwise, ``False`` is returned.
- If ``keep_file_open`` is ``auto``, and ``indexed_gzip`` is
not available, ``False`` is returned.
- Otherwise, the value of ``keep_file_open`` is returned unchanged.

Parameters
----------
Expand All @@ -203,23 +207,21 @@ def _should_keep_file_open(self, file_like, keep_file_open):
-------

The value of ``keep_file_open`` that will be used by this
``ArrayProxy``.
``ArrayProxy``, and passed through to ``ImageOpener`` instances.
"""
if keep_file_open is None:
keep_file_open = KEEP_FILE_OPEN_DEFAULT
# if keep_file_open is True/False, we do what the user wants us to do
if isinstance(keep_file_open, bool):
return keep_file_open
if keep_file_open != 'auto':
if keep_file_open not in ('auto', True, False):
raise ValueError('keep_file_open should be one of {None, '
'\'auto\', True, False}')

# file_like is a handle - keep_file_open is irrelevant
if hasattr(file_like, 'read') and hasattr(file_like, 'seek'):
return False
# Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set
# keep_file_open to True, else we set it to False
return HAVE_INDEXED_GZIP and file_like.endswith('gz')
# don't have indexed_gzip - auto -> False
if keep_file_open == 'auto' and not (openers.HAVE_INDEXED_GZIP and
file_like.endswith('.gz')):
return False
return keep_file_open

@property
@deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0')
Expand Down Expand Up @@ -265,10 +267,11 @@ def _get_fileobj(self):
"""
if self._keep_file_open:
if not hasattr(self, '_opener'):
self._opener = ImageOpener(self.file_like, keep_open=True)
self._opener = openers.ImageOpener(
self.file_like, keep_open=self._keep_file_open)
yield self._opener
else:
with ImageOpener(self.file_like, keep_open=False) as opener:
with openers.ImageOpener(self.file_like) as opener:
yield opener

def get_unscaled(self):
Expand Down
1 change: 1 addition & 0 deletions nibabel/benchmarks/bench_array_to_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from __future__ import division, print_function

import sys
from io import BytesIO # NOQA
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why #NOQA?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style warning without it - BytesIO only gets used in evaled strings, so the style checker doesn't pick it up as being used.


import numpy as np

Expand Down
18 changes: 4 additions & 14 deletions nibabel/benchmarks/bench_arrayproxy_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
"""

from timeit import timeit
import contextlib
import gc
import itertools as it
import numpy as np
Expand Down Expand Up @@ -51,24 +50,14 @@
('?', '?', '?', ':'),
]

KEEP_OPENS = [False, True]
KEEP_OPENS = [False, True, 'auto']

if HAVE_INDEXED_GZIP:
HAVE_IGZIP = [False, True]
else:
HAVE_IGZIP = [False]


@contextlib.contextmanager
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can now remove contextlib import.

def patch_indexed_gzip(have_igzip):

atts = ['nibabel.openers.HAVE_INDEXED_GZIP',
'nibabel.arrayproxy.HAVE_INDEXED_GZIP']

with mock.patch(atts[0], have_igzip), mock.patch(atts[1], have_igzip):
yield


def bench_arrayproxy_slicing():

print_git_title('\nArrayProxy gzip slicing')
Expand Down Expand Up @@ -154,14 +143,15 @@ def fmt_sliceobj(sliceobj):
# load uncompressed and compressed versions of the image
img = nib.load(testfile, keep_file_open=keep_open)

with patch_indexed_gzip(have_igzip):
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', have_igzip):
imggz = nib.load(testfilegz, keep_file_open=keep_open)

def basefunc():
img.dataobj[fix_sliceobj(sliceobj)]

def testfunc():
with patch_indexed_gzip(have_igzip):
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP',
have_igzip):
imggz.dataobj[fix_sliceobj(sliceobj)]

# make sure nothing is floating around from the previous test
Expand Down
24 changes: 16 additions & 8 deletions nibabel/openers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,22 @@

# is indexed_gzip present and modern?
try:
from indexed_gzip import SafeIndexedGzipFile, __version__ as version
import indexed_gzip as igzip
version = igzip.__version__

HAVE_INDEXED_GZIP = True

if StrictVersion(version) < StrictVersion('0.6.0'):
# < 0.7 - no good
if StrictVersion(version) < StrictVersion('0.7.0'):
warnings.warn('indexed_gzip is present, but too old '
'(>= 0.6.0 required): {})'.format(version))
'(>= 0.7.0 required): {})'.format(version))
HAVE_INDEXED_GZIP = False

del version
# >= 0.8 SafeIndexedGzipFile renamed to IndexedGzipFile
elif StrictVersion(version) < StrictVersion('0.8.0'):
IndexedGzipFile = igzip.SafeIndexedGzipFile
else:
IndexedGzipFile = igzip.IndexedGzipFile
del igzip, version

except ImportError:
HAVE_INDEXED_GZIP = False
Expand Down Expand Up @@ -80,9 +86,11 @@ def readinto(self, buf):

def _gzip_open(filename, mode='rb', compresslevel=9, keep_open=False):

# use indexed_gzip if possible for faster read access
if keep_open and mode == 'rb' and HAVE_INDEXED_GZIP:
gzip_file = SafeIndexedGzipFile(filename)
# use indexed_gzip if possible for faster read access. If keep_open ==
# True, we tell IndexedGzipFile to keep the file handle open. Otherwise
# the IndexedGzipFile will close/open the file on each read.
if HAVE_INDEXED_GZIP and mode == 'rb':
gzip_file = IndexedGzipFile(filename, drop_handles=not keep_open)

# Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class
# defined above)
Expand Down
Loading