Skip to content

Only use indexed_gzip when explicitly requested #562

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 6, 2017
4 changes: 2 additions & 2 deletions nibabel/arrayproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,10 +265,10 @@ def _get_fileobj(self):
"""
if self._keep_file_open:
if not hasattr(self, '_opener'):
self._opener = ImageOpener(self.file_like)
self._opener = ImageOpener(self.file_like, keep_open=True)
yield self._opener
else:
with ImageOpener(self.file_like) as opener:
with ImageOpener(self.file_like, keep_open=False) as opener:
yield opener

def get_unscaled(self):
Expand Down
3 changes: 2 additions & 1 deletion nibabel/benchmarks/bench_array_to_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@

import numpy as np


from .butils import print_git_title

from numpy.testing import measure

from nibabel.volumeutils import array_to_file # NOQA


def bench_array_to_file():
rng = np.random.RandomState(20111001)
Expand Down
202 changes: 202 additions & 0 deletions nibabel/benchmarks/bench_arrayproxy_slicing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""Benchmarks for ArrayProxy slicing of gzipped and non-gzipped files

Run benchmarks with::

import nibabel as nib
nib.bench()

If you have doctests enabled by default in nose (with a noserc file or
environment variable), and you have a numpy version <= 1.6.1, this will also
run the doctests, let's hope they pass.

Run this benchmark with:

nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' /path/to/bench_arrayproxy_slicing.py
"""

from timeit import timeit
import contextlib
import gc
import itertools as it
import numpy as np
import mock

import nibabel as nib
from nibabel.tmpdirs import InTemporaryDirectory
from nibabel.openers import HAVE_INDEXED_GZIP

from .butils import print_git_title
from ..rstutils import rst_table

# if memory_profiler is installed, we get memory usage results
try:
from memory_profiler import memory_usage
except ImportError:
memory_usage = None


# Each test involves loading an image of shape SHAPE, and then slicing it
# NITERS times
NITERS = 50
SHAPE = (100, 100, 100, 100)

# One test is run for each combination of SLICEOBJS, KEEP_OPENS, and HAVE_IGZIP

# ':' gets replaced with slice(None)
# '?' gets replaced with a random index into the relevant axis
# numbers (assumed to be between 0 and 1) get scaled to the axis shape
SLICEOBJS = [
('?', ':', ':', ':'),
(':', ':', ':', '?'),
('?', '?', '?', ':'),
]

KEEP_OPENS = [False, True]

if HAVE_INDEXED_GZIP:
HAVE_IGZIP = [False, True]
else:
HAVE_IGZIP = [False]


@contextlib.contextmanager
def patch_indexed_gzip(have_igzip):

atts = ['nibabel.openers.HAVE_INDEXED_GZIP',
'nibabel.arrayproxy.HAVE_INDEXED_GZIP']

with mock.patch(atts[0], have_igzip), mock.patch(atts[1], have_igzip):
yield


def bench_arrayproxy_slicing():

print_git_title('\nArrayProxy gzip slicing')

# each test is a tuple containing
# (HAVE_INDEXED_GZIP, keep_file_open, sliceobj)
tests = list(it.product(HAVE_IGZIP, KEEP_OPENS, SLICEOBJS))

# remove tests where HAVE_INDEXED_GZIP is True and keep_file_open is False,
# because if keep_file_open is False, HAVE_INDEXED_GZIP has no effect
tests = [t for t in tests if not (t[0] and not t[1])]

testfile = 'testfile.nii'
testfilegz = 'test.nii.gz'

def get_test_label(test):
have_igzip = test[0]
keep_open = test[1]

if not (have_igzip and keep_open):
return 'gzip'
else:
return 'indexed_gzip'

def fix_sliceobj(sliceobj):
new_sliceobj = []
for i, s in enumerate(sliceobj):
if s == ':':
new_sliceobj.append(slice(None))
elif s == '?':
new_sliceobj.append(np.random.randint(0, SHAPE[i]))
else:
new_sliceobj.append(int(s * SHAPE[i]))
return tuple(new_sliceobj)

def fmt_sliceobj(sliceobj):
slcstr = []
for i, s in enumerate(sliceobj):
if s in ':?':
slcstr.append(s)
else:
slcstr.append(str(int(s * SHAPE[i])))
return '[{}]'.format(', '.join(slcstr))

with InTemporaryDirectory():

print('Generating test data... ({} MB)'.format(
int(round(np.prod(SHAPE) * 4 / 1048576.))))

data = np.array(np.random.random(SHAPE), dtype=np.float32)

# zero out 10% of voxels so gzip has something to compress
mask = np.random.random(SHAPE[:3]) > 0.1
if len(SHAPE) > 3:
data[mask, :] = 0
else:
data[mask] = 0

# save uncompressed and compressed versions of the image
img = nib.nifti1.Nifti1Image(data, np.eye(4))
nib.save(img, testfilegz)
nib.save(img, testfile)

# each result is a tuple containing
# (label, keep_open, sliceobj, testtime, basetime, testmem, basemem)
#
# where "basetime" is the time taken to load and slice a memmapped
# (uncompressed)image, and "basemem" is memory usage for the same
results = []

# We use the same random seed for each slice object,
seeds = [np.random.randint(0, 2 ** 32) for s in SLICEOBJS]

for ti, test in enumerate(tests):

label = get_test_label(test)
have_igzip, keep_open, sliceobj = test
seed = seeds[SLICEOBJS.index(sliceobj)]

print('Running test {} of {} ({})...'.format(
ti + 1, len(tests), label))

# load uncompressed and compressed versions of the image
img = nib.load(testfile, keep_file_open=keep_open)

with patch_indexed_gzip(have_igzip):
imggz = nib.load(testfilegz, keep_file_open=keep_open)

def basefunc():
img.dataobj[fix_sliceobj(sliceobj)]

def testfunc():
with patch_indexed_gzip(have_igzip):
imggz.dataobj[fix_sliceobj(sliceobj)]

# make sure nothing is floating around from the previous test
# iteration, so memory profiling is (hopefully) more accurate
gc.collect()

if memory_usage is not None:
membaseline = max(memory_usage(lambda: None))
testmem = max(memory_usage(testfunc)) - membaseline
basemem = max(memory_usage(basefunc)) - membaseline
else:
testmem = np.nan
basemem = np.nan

# reset the random number generator, so test and baseline use the
# same slices
np.random.seed(seed)
testtime = float(timeit(testfunc, number=NITERS)) / float(NITERS)
np.random.seed(seed)
basetime = float(timeit(basefunc, number=NITERS)) / float(NITERS)

results.append((label, keep_open, sliceobj, testtime, basetime,
testmem, basemem))

data = np.zeros((len(results), 4))
data[:, 0] = [r[3] for r in results]
data[:, 1] = [r[4] for r in results]
try:
data[:, 2] = [r[3] / r[4] for r in results]
except:
data[:, 2] = np.nan
data[:, 3] = [r[5] - r[6] for r in results]

rowlbls = ['Type {}, keep_open {}, slice {}'.format(
r[0], r[1], fmt_sliceobj(r[2])) for r in results]
collbls = ['Time', 'Baseline time', 'Time ratio', 'Memory deviation']

print(rst_table(data, rowlbls, collbls))
2 changes: 2 additions & 0 deletions nibabel/benchmarks/bench_finite_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

from numpy.testing import measure

from nibabel.volumeutils import finite_range # NOQA


def bench_finite_range():
rng = np.random.RandomState(20111001)
Expand Down
31 changes: 25 additions & 6 deletions nibabel/openers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,23 @@
import bz2
import gzip
import sys
import warnings
from os.path import splitext
from distutils.version import StrictVersion

# is indexed_gzip present?
# is indexed_gzip present and modern?
try:
from indexed_gzip import SafeIndexedGzipFile
from indexed_gzip import SafeIndexedGzipFile, __version__ as version

HAVE_INDEXED_GZIP = True

if StrictVersion(version) < StrictVersion('0.6.0'):
warnings.warn('indexed_gzip is present, but too old '
'(>= 0.6.0 required): {})'.format(version))
HAVE_INDEXED_GZIP = False

del version

except ImportError:
HAVE_INDEXED_GZIP = False

Expand Down Expand Up @@ -67,10 +78,10 @@ def readinto(self, buf):
return n_read


def _gzip_open(filename, mode='rb', compresslevel=9):
def _gzip_open(filename, mode='rb', compresslevel=9, keep_open=False):

# use indexed_gzip if possible for faster read access
if mode == 'rb' and HAVE_INDEXED_GZIP:
if keep_open and mode == 'rb' and HAVE_INDEXED_GZIP:
gzip_file = SafeIndexedGzipFile(filename)

# Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class
Expand Down Expand Up @@ -101,12 +112,13 @@ class Opener(object):
\*args : positional arguments
passed to opening method when `fileish` is str. ``mode``, if not
specified, is `rb`. ``compresslevel``, if relevant, and not specified,
is set from class variable ``default_compresslevel``
is set from class variable ``default_compresslevel``. ``keep_open``, if
relevant, and not specified, is ``False``.
\*\*kwargs : keyword arguments
passed to opening method when `fileish` is str. Change of defaults as
for \*args
"""
gz_def = (_gzip_open, ('mode', 'compresslevel'))
gz_def = (_gzip_open, ('mode', 'compresslevel', 'keep_open'))
bz2_def = (bz2.BZ2File, ('mode', 'buffering', 'compresslevel'))
compress_ext_map = {
'.gz': gz_def,
Expand All @@ -132,8 +144,15 @@ def __init__(self, fileish, *args, **kwargs):
# Set default mode
if 'mode' not in full_kwargs:
kwargs['mode'] = 'rb'
# Default compression level
if 'compresslevel' in arg_names and 'compresslevel' not in kwargs:
kwargs['compresslevel'] = self.default_compresslevel
# Default keep_open hint
if 'keep_open' in arg_names:
kwargs.setdefault('keep_open', False)
# Clear keep_open hint if it is not relevant for the file type
else:
kwargs.pop('keep_open', None)
self.fobj = opener(fileish, *args, **kwargs)
self._name = fileish
self.me_opened = True
Expand Down
4 changes: 2 additions & 2 deletions nibabel/pkg_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import sys
import subprocess
try:
from ConfigParser import ConfigParser
from ConfigParser import RawConfigParser as ConfigParser
except ImportError:
from configparser import ConfigParser # python 3
from configparser import RawConfigParser as ConfigParser # python 3

COMMIT_INFO_FNAME = 'COMMIT_INFO.txt'

Expand Down
16 changes: 1 addition & 15 deletions nibabel/tests/test_arrayproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from nibabel.testing import VIRAL_MEMMAP

from .test_fileslice import slicer_samples
from .test_openers import patch_indexed_gzip


class FunkyHeader(object):
Expand Down Expand Up @@ -412,21 +413,6 @@ def test_keep_file_open_true_false_invalid():
ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto')


@contextlib.contextmanager
def patch_indexed_gzip(state):
# Make it look like we do (state==True) or do not (state==False) have
# the indexed gzip module.
if state:
values = (True, True, gzip.GzipFile)
else:
values = (False, False, None)
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', values[0]), \
mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', values[1]), \
mock.patch('nibabel.openers.SafeIndexedGzipFile', values[2],
create=True):
yield


@contextlib.contextmanager
def patch_keep_file_open_default(value):
# Patch arrayproxy.KEEP_FILE_OPEN_DEFAULT with the given value
Expand Down
Loading