Skip to content

ENH: ArrayProxy reshape #521

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ Enhancements
(pr/495); function to concatenate multiple ArraySequence objects (pr/494)
* Support for numpy 1.12 (pr/500, pr/502) (MC, MB)
* Allow dtype specifiers as fileslice input (pr/485) (MB)
* Support "headerless" ArrayProxy specification, enabling memory-efficient
ArrayProxy reshaping (pr/521) (CM)

Bug fixes
---------
Expand All @@ -60,6 +62,10 @@ Maintenance
API changes and deprecations
----------------------------

* ``header`` argument to ``ArrayProxy.__init__`` is renamed to ``spec``
* Deprecation of ``header`` property of ``ArrayProxy`` object, for removal in
3.0


2.1 (Monday 22 August 2016)
===========================
Expand Down
82 changes: 65 additions & 17 deletions nibabel/arrayproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@

See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks.
"""
import warnings

import numpy as np

from .deprecated import deprecate_with_version
from .volumeutils import array_from_file, apply_read_scaling
from .fileslice import fileslice
from .keywordonly import kw_only_meth
Expand All @@ -45,14 +44,17 @@ class ArrayProxy(object):
of the numpy dtypes, starting at a given file position ``offset`` with
single ``slope`` and ``intercept`` scaling to produce output values.

The class ``__init__`` requires a ``header`` object with methods:
The class ``__init__`` requires a spec which defines how the data will be
read and rescaled. The spec may be a tuple of length 2 - 5, containing the
shape, storage dtype, offset, slope and intercept, or a ``header`` object
with methods:

* get_data_shape
* get_data_dtype
* get_data_offset
* get_slope_inter

The header should also have a 'copy' method. This requirement will go away
A header should also have a 'copy' method. This requirement will go away
when the deprecated 'header' propoerty goes away.

This implementation allows us to deal with Analyze and its variants,
Expand All @@ -64,17 +66,32 @@ class ArrayProxy(object):
"""
# Assume Fortran array memory layout
order = 'F'
_header = None

@kw_only_meth(2)
def __init__(self, file_like, header, mmap=True):
def __init__(self, file_like, spec, mmap=True):
""" Initialize array proxy instance

Parameters
----------
file_like : object
File-like object or filename. If file-like object, should implement
at least ``read`` and ``seek``.
header : object
spec : object or tuple
Tuple must have length 2-5, with the following values.
- shape : tuple
tuple of ints describing shape of data
- storage_dtype : dtype specifier
dtype of array inside proxied file, or input to ``numpy.dtype``
to specify array dtype
- offset : int
Offset, in bytes, of data array from start of file
(default: 0)
- slope : float
Scaling factor for resulting data (default: 1.0)
- inter : float
Intercept for rescaled data (default: 0.0)
OR
Header object implementing ``get_data_shape``, ``get_data_dtype``,
``get_data_offset``, ``get_slope_inter``
mmap : {True, False, 'c', 'r'}, optional, keyword only
Expand All @@ -90,22 +107,30 @@ def __init__(self, file_like, header, mmap=True):
if mmap not in (True, False, 'c', 'r'):
raise ValueError("mmap should be one of {True, False, 'c', 'r'}")
self.file_like = file_like
if hasattr(spec, 'get_data_shape'):
slope, inter = spec.get_slope_inter()
par = (spec.get_data_shape(),
spec.get_data_dtype(),
spec.get_data_offset(),
1. if slope is None else slope,
0. if inter is None else inter)
# Reference to original header; we will remove this soon
self._header = spec.copy()
elif 2 <= len(spec) <= 5:
optional = (0, 1., 0.)
par = spec + optional[len(spec) - 2:]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice.

else:
raise TypeError('spec must be tuple of length 2-5 or header object')

# Copies of values needed to read array
self._shape = header.get_data_shape()
self._dtype = header.get_data_dtype()
self._offset = header.get_data_offset()
self._slope, self._inter = header.get_slope_inter()
self._slope = 1.0 if self._slope is None else self._slope
self._inter = 0.0 if self._inter is None else self._inter
self._shape, self._dtype, self._offset, self._slope, self._inter = par
# Permit any specifier that can be interpreted as a numpy dtype
self._dtype = np.dtype(self._dtype)
self._mmap = mmap
# Reference to original header; we will remove this soon
self._header = header.copy()

@property
@deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice.

def header(self):
warnings.warn('We will remove the header property from proxies soon',
FutureWarning,
stacklevel=2)
return self._header

@property
Expand Down Expand Up @@ -162,6 +187,29 @@ def __getitem__(self, slicer):
# Upcast as necessary for big slopes, intercepts
return apply_read_scaling(raw_data, self._slope, self._inter)

def reshape(self, shape):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One-liner docstring?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to allow -1 as element in shape, for compatibility with numpy array reshape.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

''' Return an ArrayProxy with a new shape, without modifying data '''
size = np.prod(self._shape)

# Calculate new shape if not fully specified
from operator import mul
from functools import reduce
n_unknowns = len([e for e in shape if e == -1])
if n_unknowns > 1:
raise ValueError("can only specify one unknown dimension")
elif n_unknowns == 1:
known_size = reduce(mul, shape, -1)
unknown_size = size // known_size
shape = tuple(unknown_size if e == -1 else e for e in shape)

if np.prod(shape) != size:
raise ValueError("cannot reshape array of size {:d} into shape "
"{!s}".format(size, shape))
return self.__class__(file_like=self.file_like,
spec=(shape, self._dtype, self._offset,
self._slope, self._inter),
mmap=self._mmap)


def is_proxy(obj):
""" Return True if `obj` is an array proxy
Expand Down
26 changes: 1 addition & 25 deletions nibabel/cifti2/parse_cifti2.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,33 +121,9 @@ def _chk_pixdims(hdr, fix=False):


class _Cifti2AsNiftiImage(Nifti2Image):
""" Load a NIfTI2 image with a Cifti2 header """
header_class = _Cifti2AsNiftiHeader
files_types = (('image', '.nii'),)
valid_exts = ('.nii',)
makeable = False
rw = True

def __init__(self, dataobj, affine, header=None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is because the cifti_img attribute is not being used elsewhere?

Do we want to raise an error if no cifti extension found?

What is this subclass now for?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point of the subclass is to be a Nifti2Image with this header, which has different constraints from a standard Nifti2Header.

extra=None, file_map=None):
"""Convert NIFTI-2 file to CIFTI2"""
super(_Cifti2AsNiftiImage, self).__init__(dataobj=dataobj,
affine=affine,
header=header,
extra=extra,
file_map=file_map)

# Get cifti header from extension
for extension in self.header.extensions:
if isinstance(extension, Cifti2Extension):
self.cifti_img = extension
break
else:
self.cifti_img = None

if self.cifti_img is None:
raise ValueError('Nifti2 header does not contain a CIFTI2 '
'extension')
self.cifti_img.data = self.get_data()


class Cifti2Parser(xml.XmlParser):
Expand Down
6 changes: 3 additions & 3 deletions nibabel/cifti2/tests/test_cifti2io.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ def test_read_and_proxies():
assert_true(isinstance(img2.header, ci.Cifti2Header))
assert_equal(img2.shape, (1, 91282))
# While we cannot reshape arrayproxies, all images are in-memory
assert_true(img2.in_memory)
assert_true(not img2.in_memory)
data = img2.get_data()
assert_true(data is img2.dataobj)
assert_true(data is not img2.dataobj)
# Uncaching has no effect, images are always array images
img2.uncache()
assert_true(data is img2.get_data())
assert_true(data is not img2.get_data())


@needs_nibabel_data('nitest-cifti2')
Expand Down
49 changes: 49 additions & 0 deletions nibabel/tests/test_arrayproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,41 @@ def test_init():
bio.write(arr.tostring(order='C'))
ap = CArrayProxy(bio, FunkyHeader((2, 3, 4)))
assert_array_equal(np.asarray(ap), arr)
# Illegal init
assert_raises(TypeError, ArrayProxy, bio, object())


def test_tuplespec():
bio = BytesIO()
shape = [2, 3, 4]
dtype = np.int32
arr = np.arange(24, dtype=dtype).reshape(shape)
bio.seek(16)
bio.write(arr.tostring(order='F'))
# Create equivalent header and tuple specs
hdr = FunkyHeader(shape)
tuple_spec = (hdr.get_data_shape(), hdr.get_data_dtype(),
hdr.get_data_offset(), 1., 0.)
ap_header = ArrayProxy(bio, hdr)
ap_tuple = ArrayProxy(bio, tuple_spec)
# Header and tuple specs produce identical behavior
for prop in ('shape', 'dtype', 'offset', 'slope', 'inter', 'is_proxy'):
assert_equal(getattr(ap_header, prop), getattr(ap_tuple, prop))
for method, args in (('get_unscaled', ()), ('__array__', ()),
('__getitem__', ((0, 2, 1), ))
):
assert_array_equal(getattr(ap_header, method)(*args),
getattr(ap_tuple, method)(*args))
# Tuple-defined ArrayProxies have no header to store
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one raises a warning no? Need to protect with warnings context manager?

with warnings.catch_warnings():
assert_true(ap_tuple.header is None)
# Partial tuples of length 2-4 are also valid
for n in range(2, 5):
ArrayProxy(bio, tuple_spec[:n])
# Bad tuple lengths
assert_raises(TypeError, ArrayProxy, bio, ())
assert_raises(TypeError, ArrayProxy, bio, tuple_spec[:1])
assert_raises(TypeError, ArrayProxy, bio, tuple_spec + ('error',))


def write_raw_data(arr, hdr, fileobj):
Expand Down Expand Up @@ -185,6 +220,20 @@ def __array__(self):
assert_equal(arr.shape, shape)


def test_reshaped_is_proxy():
shape = (1, 2, 3, 4)
hdr = FunkyHeader(shape)
bio = BytesIO()
prox = ArrayProxy(bio, hdr)
assert_true(isinstance(prox.reshape((2, 3, 4)), ArrayProxy))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need tests for -1 element, errors for incorrect size, to many -1s.

minus1 = prox.reshape((2, -1, 4))
assert_true(isinstance(minus1, ArrayProxy))
assert_equal(minus1.shape, (2, 3, 4))
assert_raises(ValueError, prox.reshape, (-1, -1, 4))
assert_raises(ValueError, prox.reshape, (2, 3, 5))
assert_raises(ValueError, prox.reshape, (2, -1, 5))


def test_get_unscaled():
# Test fetch of raw array
class FunkyHeader2(FunkyHeader):
Expand Down
2 changes: 1 addition & 1 deletion nibabel/tests/test_proxy_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def validate_deprecated_header(self, pmaker, params):
# Header is a copy of original
assert_false(prox.header is hdr)
assert_equal(prox.header, hdr)
assert_equal(warns.pop(0).category, FutureWarning)
assert_equal(warns.pop(0).category, DeprecationWarning)


class TestSpm99AnalyzeProxyAPI(TestAnalyzeProxyAPI):
Expand Down