Skip to content

FIX: Accept dtype parameter to ArrayProxy.__array__ #844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions Changelog
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@ References like "pr/298" refer to github pull request numbers.

New features
------------
* ArrayProxy method ``get_scaled()`` scales data with a dtype of a
specified precision, promoting as necessary to avoid overflow. This
is to used in ``img.get_fdata()`` to control memory usage. (pr/833)
(CM, reviewed by Ross Markello)
* ArrayProxy ``__array__()`` now accepts a ``dtype`` parameter, allowing
``numpy.array(dataobj, dtype=...)`` calls, as well as casting directly
with a dtype (for example, ``numpy.float32(dataobj)``) to control the
output type. Scale factors (slope, intercept) are applied, but may be
cast to narrower types, to control memory usage. This is now the basis
of ``img.get_fdata()``, which will scale data in single precision if
the output type is ``float32``. (pr/844) (CM, reviewed by ...)
* GiftiImage method ``agg_data()`` to return usable data arrays (pr/793)
(Hao-Ting Wang, reviewed by CM)
* Accept ``os.PathLike`` objects in place of filenames (pr/610) (Cameron
Expand Down
34 changes: 15 additions & 19 deletions nibabel/arrayproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,35 +378,31 @@ def get_unscaled(self):
"""
return self._get_unscaled(slicer=())

def get_scaled(self, dtype=None):
""" Read data from file and apply scaling
def __array__(self, dtype=None):
""" Read data from file and apply scaling, casting to ``dtype``

The dtype of the returned array is the narrowest dtype that can
represent the data without overflow, and is at least as wide as
the dtype parameter.
If ``dtype`` is unspecified, the dtype of the returned array is the
narrowest dtype that can represent the data without overflow.
Generally, it is the wider of the dtypes of the slopes or intercepts.

If dtype is unspecified, it is the wider of the dtypes of the slope
or intercept. This will generally be determined by the parameter
size in the image header, and so should be consistent for a given
image format, but may vary across formats. Notably, these factors
are single-precision (32-bit) floats for NIfTI-1 and double-precision
(64-bit) floats for NIfTI-2.
The types of the scale factors will generally be determined by the
parameter size in the image header, and so should be consistent for a
given image format, but may vary across formats.

Parameters
----------
dtype : numpy dtype specifier
A numpy dtype specifier specifying the narrowest acceptable
dtype.
dtype : numpy dtype specifier, optional
A numpy dtype specifier specifying the type of the returned array.

Returns
-------
array
Scaled of image data of data type `dtype`.
Scaled image data with type `dtype`.
"""
return self._get_scaled(dtype=dtype, slicer=())

def __array__(self):
return self._get_scaled(dtype=None, slicer=())
arr = self._get_scaled(dtype=dtype, slicer=())
if dtype is not None:
arr = arr.astype(dtype, copy=False)
return arr

def __getitem__(self, slicer):
return self._get_scaled(dtype=None, slicer=slicer)
Expand Down
11 changes: 3 additions & 8 deletions nibabel/dataobj_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import numpy as np

from .arrayproxy import is_proxy
from .filebasedimages import FileBasedImage
from .keywordonly import kw_only_meth
from .deprecated import deprecate_with_version
Expand Down Expand Up @@ -351,14 +350,10 @@ def get_fdata(self, caching='fill', dtype=np.float64):
if self._fdata_cache is not None:
if self._fdata_cache.dtype.type == dtype.type:
return self._fdata_cache
dataobj = self._dataobj
# Attempt to confine data array to dtype during scaling
# On overflow, may still upcast
if is_proxy(dataobj):
dataobj = dataobj.get_scaled(dtype=dtype)
# Always return requested data type
# For array proxies, will only copy on overflow
data = np.asanyarray(dataobj, dtype=dtype)
# For array proxies, will attempt to confine data array to dtype
# during scaling
data = np.asanyarray(self._dataobj, dtype=dtype)
if caching == 'fill':
self._fdata_cache = data
return data
Expand Down
41 changes: 14 additions & 27 deletions nibabel/ecat.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,46 +689,33 @@ def ndim(self):
def is_proxy(self):
return True

def __array__(self):
def __array__(self, dtype=None):
''' Read of data from file

This reads ALL FRAMES into one array, can be memory expensive.

If you want to read only some slices, use the slicing syntax
(``__getitem__``) below, or ``subheader.data_from_fileobj(frame)``
'''
data = np.empty(self.shape)
frame_mapping = get_frame_order(self._subheader._mlist)
for i in sorted(frame_mapping):
data[:, :, :, i] = self._subheader.data_from_fileobj(
frame_mapping[i][0])
return data

def get_scaled(self, dtype=None):
""" Read data from file and apply scaling

The dtype of the returned array is the narrowest dtype that can
represent the data without overflow, and is at least as wide as
the dtype parameter.

If dtype is unspecified, it is automatically determined.

Parameters
----------
dtype : numpy dtype specifier
A numpy dtype specifier specifying the narrowest acceptable
dtype.
dtype : numpy dtype specifier, optional
A numpy dtype specifier specifying the type of the returned array.

Returns
-------
array
Scaled of image data of data type `dtype`.
"""
data = self.__array__()
if dtype is None:
return data
final_type = np.promote_types(data.dtype, dtype)
return data.astype(final_type, copy=False)
Scaled image data with type `dtype`.
'''
# dtype=None is interpreted as float64
data = np.empty(self.shape)
frame_mapping = get_frame_order(self._subheader._mlist)
for i in sorted(frame_mapping):
data[:, :, :, i] = self._subheader.data_from_fileobj(
frame_mapping[i][0])
if dtype is not None:
data = data.astype(dtype, copy=False)
return data

def __getitem__(self, sliceobj):
""" Return slice `sliceobj` from ECAT data, optimizing if possible
Expand Down
35 changes: 11 additions & 24 deletions nibabel/minc1.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,42 +261,29 @@ def ndim(self):
def is_proxy(self):
return True

def _get_scaled(self, dtype, slicer):
data = self.minc_file.get_scaled_data(slicer)
if dtype is None:
return data
final_type = np.promote_types(data.dtype, dtype)
return data.astype(final_type, copy=False)

def get_scaled(self, dtype=None):
""" Read data from file and apply scaling
def __array__(self, dtype=None):
""" Read data from file and apply scaling, casting to ``dtype``

The dtype of the returned array is the narrowest dtype that can
represent the data without overflow, and is at least as wide as
the dtype parameter.

If dtype is unspecified, it is automatically determined.
If ``dtype`` is unspecified, the dtype is automatically determined.

Parameters
----------
dtype : numpy dtype specifier
A numpy dtype specifier specifying the narrowest acceptable
dtype.
dtype : numpy dtype specifier, optional
A numpy dtype specifier specifying the type of the returned array.

Returns
-------
array
Scaled of image data of data type `dtype`.
Scaled image data with type `dtype`.
"""
return self._get_scaled(dtype=dtype, slicer=())

def __array__(self):
''' Read of data from file '''
return self._get_scaled(dtype=None, slicer=())
arr = self.minc_file.get_scaled_data(sliceobj=())
if dtype is not None:
arr = arr.astype(dtype, copy=False)
return arr

def __getitem__(self, sliceobj):
""" Read slice `sliceobj` of data from file """
return self._get_scaled(dtype=None, slicer=sliceobj)
return self.minc_file.get_scaled_data(sliceobj)


class MincHeader(SpatialHeader):
Expand Down
28 changes: 12 additions & 16 deletions nibabel/parrec.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,31 +676,27 @@ def get_unscaled(self):
"""
return self._get_unscaled(slicer=())

def get_scaled(self, dtype=None):
""" Read data from file and apply scaling
def __array__(self, dtype=None):
""" Read data from file and apply scaling, casting to ``dtype``

The dtype of the returned array is the narrowest dtype that can
represent the data without overflow, and is at least as wide as
the dtype parameter.

If dtype is unspecified, it is the wider of the dtypes of the slopes
or intercepts
If ``dtype`` is unspecified, the dtype of the returned array is the
narrowest dtype that can represent the data without overflow.
Generally, it is the wider of the dtypes of the slopes or intercepts.

Parameters
----------
dtype : numpy dtype specifier
A numpy dtype specifier specifying the narrowest acceptable
dtype.
dtype : numpy dtype specifier, optional
A numpy dtype specifier specifying the type of the returned array.

Returns
-------
array
Scaled of image data of data type `dtype`.
Scaled image data with type `dtype`.
"""
return self._get_scaled(dtype=dtype, slicer=())

def __array__(self):
return self._get_scaled(dtype=None, slicer=())
arr = self._get_scaled(dtype=dtype, slicer=())
if dtype is not None:
arr = arr.astype(dtype, copy=False)
return arr

def __getitem__(self, slicer):
return self._get_scaled(dtype=None, slicer=slicer)
Expand Down
2 changes: 1 addition & 1 deletion nibabel/tests/test_api_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def meth(self):
for imaker, params in self.obj_params():
validator(self, imaker, params)
meth.__name__ = 'test_' + name[len('validate_'):]
meth.__doc__ = 'autogenerated test from ' + name
meth.__doc__ = 'autogenerated test from {}.{}'.format(klass.__name__, name)
return meth
for name in dir(klass):
if not name.startswith('validate_'):
Expand Down
38 changes: 26 additions & 12 deletions nibabel/tests/test_proxy_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@

from numpy.testing import assert_almost_equal, assert_array_equal, assert_allclose

from ..testing import data_path as DATA_PATH, assert_dt_equal
from ..testing import data_path as DATA_PATH, assert_dt_equal, clear_and_catch_warnings

from ..tmpdirs import InTemporaryDirectory

Expand Down Expand Up @@ -132,24 +132,38 @@ def validate_asarray(self, pmaker, params):
# Shape matches expected shape
assert_equal(out.shape, params['shape'])

def validate_get_scaled(self, pmaker, params):
def validate_array_interface_with_dtype(self, pmaker, params):
# Check proxy returns expected array from asarray
prox, fio, hdr = pmaker()
out = prox.get_scaled()
assert_array_equal(out, params['arr_out'])
assert_dt_equal(out.dtype, params['dtype_out'])
# Shape matches expected shape
assert_equal(out.shape, params['shape'])
orig = np.array(prox, dtype=None)
assert_array_equal(orig, params['arr_out'])
assert_dt_equal(orig.dtype, params['dtype_out'])

context = None
if np.issubdtype(orig.dtype, np.complexfloating):
context = clear_and_catch_warnings()
context.__enter__()
warnings.simplefilter('ignore', np.ComplexWarning)

for dtype in np.sctypes['float'] + np.sctypes['int'] + np.sctypes['uint']:
out = prox.get_scaled(dtype=dtype)
# Directly coerce with a dtype
direct = dtype(prox)
# Half-precision is imprecise. Obviously. It's a bad idea, but don't break
# the test over it.
rtol = 1e-03 if dtype == np.float16 else 1e-05
assert_allclose(out, params['arr_out'].astype(out.dtype), rtol=rtol, atol=1e-08)
assert_greater_equal(out.dtype, np.dtype(dtype))
# Shape matches expected shape
assert_equal(out.shape, params['shape'])
assert_allclose(direct, orig.astype(dtype), rtol=rtol, atol=1e-08)
assert_dt_equal(direct.dtype, np.dtype(dtype))
assert_equal(direct.shape, params['shape'])
# All three methods should produce equivalent results
for arrmethod in (np.array, np.asarray, np.asanyarray):
out = arrmethod(prox, dtype=dtype)
assert_array_equal(out, direct)
assert_dt_equal(out.dtype, np.dtype(dtype))
# Shape matches expected shape
assert_equal(out.shape, params['shape'])

if context is not None:
context.__exit__()

def validate_header_isolated(self, pmaker, params):
# Confirm altering input header has no effect
Expand Down