diff --git a/Changelog b/Changelog index f9f4f00df5..165f1db94b 100644 --- a/Changelog +++ b/Changelog @@ -30,10 +30,13 @@ References like "pr/298" refer to github pull request numbers. New features ------------ -* ArrayProxy method ``get_scaled()`` scales data with a dtype of a - specified precision, promoting as necessary to avoid overflow. This - is to used in ``img.get_fdata()`` to control memory usage. (pr/833) - (CM, reviewed by Ross Markello) +* ArrayProxy ``__array__()`` now accepts a ``dtype`` parameter, allowing + ``numpy.array(dataobj, dtype=...)`` calls, as well as casting directly + with a dtype (for example, ``numpy.float32(dataobj)``) to control the + output type. Scale factors (slope, intercept) are applied, but may be + cast to narrower types, to control memory usage. This is now the basis + of ``img.get_fdata()``, which will scale data in single precision if + the output type is ``float32``. (pr/844) (CM, reviewed by ...) * GiftiImage method ``agg_data()`` to return usable data arrays (pr/793) (Hao-Ting Wang, reviewed by CM) * Accept ``os.PathLike`` objects in place of filenames (pr/610) (Cameron diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index b8313e8ae4..c4189e61e8 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -378,35 +378,31 @@ def get_unscaled(self): """ return self._get_unscaled(slicer=()) - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling + def __array__(self, dtype=None): + """ Read data from file and apply scaling, casting to ``dtype`` - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. + If ``dtype`` is unspecified, the dtype of the returned array is the + narrowest dtype that can represent the data without overflow. + Generally, it is the wider of the dtypes of the slopes or intercepts. - If dtype is unspecified, it is the wider of the dtypes of the slope - or intercept. This will generally be determined by the parameter - size in the image header, and so should be consistent for a given - image format, but may vary across formats. Notably, these factors - are single-precision (32-bit) floats for NIfTI-1 and double-precision - (64-bit) floats for NIfTI-2. + The types of the scale factors will generally be determined by the + parameter size in the image header, and so should be consistent for a + given image format, but may vary across formats. Parameters ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. Returns ------- array - Scaled of image data of data type `dtype`. + Scaled image data with type `dtype`. """ - return self._get_scaled(dtype=dtype, slicer=()) - - def __array__(self): - return self._get_scaled(dtype=None, slicer=()) + arr = self._get_scaled(dtype=dtype, slicer=()) + if dtype is not None: + arr = arr.astype(dtype, copy=False) + return arr def __getitem__(self, slicer): return self._get_scaled(dtype=None, slicer=slicer) diff --git a/nibabel/dataobj_images.py b/nibabel/dataobj_images.py index dd4c853537..4b4d1b55d8 100644 --- a/nibabel/dataobj_images.py +++ b/nibabel/dataobj_images.py @@ -10,7 +10,6 @@ import numpy as np -from .arrayproxy import is_proxy from .filebasedimages import FileBasedImage from .keywordonly import kw_only_meth from .deprecated import deprecate_with_version @@ -351,14 +350,10 @@ def get_fdata(self, caching='fill', dtype=np.float64): if self._fdata_cache is not None: if self._fdata_cache.dtype.type == dtype.type: return self._fdata_cache - dataobj = self._dataobj - # Attempt to confine data array to dtype during scaling - # On overflow, may still upcast - if is_proxy(dataobj): - dataobj = dataobj.get_scaled(dtype=dtype) # Always return requested data type - # For array proxies, will only copy on overflow - data = np.asanyarray(dataobj, dtype=dtype) + # For array proxies, will attempt to confine data array to dtype + # during scaling + data = np.asanyarray(self._dataobj, dtype=dtype) if caching == 'fill': self._fdata_cache = data return data diff --git a/nibabel/ecat.py b/nibabel/ecat.py index a0923f0753..1814b9147c 100644 --- a/nibabel/ecat.py +++ b/nibabel/ecat.py @@ -689,46 +689,33 @@ def ndim(self): def is_proxy(self): return True - def __array__(self): + def __array__(self, dtype=None): ''' Read of data from file This reads ALL FRAMES into one array, can be memory expensive. If you want to read only some slices, use the slicing syntax (``__getitem__``) below, or ``subheader.data_from_fileobj(frame)`` - ''' - data = np.empty(self.shape) - frame_mapping = get_frame_order(self._subheader._mlist) - for i in sorted(frame_mapping): - data[:, :, :, i] = self._subheader.data_from_fileobj( - frame_mapping[i][0]) - return data - - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling - - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. - - If dtype is unspecified, it is automatically determined. Parameters ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. Returns ------- array - Scaled of image data of data type `dtype`. - """ - data = self.__array__() - if dtype is None: - return data - final_type = np.promote_types(data.dtype, dtype) - return data.astype(final_type, copy=False) + Scaled image data with type `dtype`. + ''' + # dtype=None is interpreted as float64 + data = np.empty(self.shape) + frame_mapping = get_frame_order(self._subheader._mlist) + for i in sorted(frame_mapping): + data[:, :, :, i] = self._subheader.data_from_fileobj( + frame_mapping[i][0]) + if dtype is not None: + data = data.astype(dtype, copy=False) + return data def __getitem__(self, sliceobj): """ Return slice `sliceobj` from ECAT data, optimizing if possible diff --git a/nibabel/minc1.py b/nibabel/minc1.py index c41af5c454..6137d11a65 100644 --- a/nibabel/minc1.py +++ b/nibabel/minc1.py @@ -261,42 +261,29 @@ def ndim(self): def is_proxy(self): return True - def _get_scaled(self, dtype, slicer): - data = self.minc_file.get_scaled_data(slicer) - if dtype is None: - return data - final_type = np.promote_types(data.dtype, dtype) - return data.astype(final_type, copy=False) - - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling + def __array__(self, dtype=None): + """ Read data from file and apply scaling, casting to ``dtype`` - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. - - If dtype is unspecified, it is automatically determined. + If ``dtype`` is unspecified, the dtype is automatically determined. Parameters ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. Returns ------- array - Scaled of image data of data type `dtype`. + Scaled image data with type `dtype`. """ - return self._get_scaled(dtype=dtype, slicer=()) - - def __array__(self): - ''' Read of data from file ''' - return self._get_scaled(dtype=None, slicer=()) + arr = self.minc_file.get_scaled_data(sliceobj=()) + if dtype is not None: + arr = arr.astype(dtype, copy=False) + return arr def __getitem__(self, sliceobj): """ Read slice `sliceobj` of data from file """ - return self._get_scaled(dtype=None, slicer=sliceobj) + return self.minc_file.get_scaled_data(sliceobj) class MincHeader(SpatialHeader): diff --git a/nibabel/parrec.py b/nibabel/parrec.py index 59eac79809..431e043d12 100644 --- a/nibabel/parrec.py +++ b/nibabel/parrec.py @@ -676,31 +676,27 @@ def get_unscaled(self): """ return self._get_unscaled(slicer=()) - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling + def __array__(self, dtype=None): + """ Read data from file and apply scaling, casting to ``dtype`` - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. - - If dtype is unspecified, it is the wider of the dtypes of the slopes - or intercepts + If ``dtype`` is unspecified, the dtype of the returned array is the + narrowest dtype that can represent the data without overflow. + Generally, it is the wider of the dtypes of the slopes or intercepts. Parameters ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. Returns ------- array - Scaled of image data of data type `dtype`. + Scaled image data with type `dtype`. """ - return self._get_scaled(dtype=dtype, slicer=()) - - def __array__(self): - return self._get_scaled(dtype=None, slicer=()) + arr = self._get_scaled(dtype=dtype, slicer=()) + if dtype is not None: + arr = arr.astype(dtype, copy=False) + return arr def __getitem__(self, slicer): return self._get_scaled(dtype=None, slicer=slicer) diff --git a/nibabel/tests/test_api_validators.py b/nibabel/tests/test_api_validators.py index a7cbb8b555..41d3f41110 100644 --- a/nibabel/tests/test_api_validators.py +++ b/nibabel/tests/test_api_validators.py @@ -19,7 +19,7 @@ def meth(self): for imaker, params in self.obj_params(): validator(self, imaker, params) meth.__name__ = 'test_' + name[len('validate_'):] - meth.__doc__ = 'autogenerated test from ' + name + meth.__doc__ = 'autogenerated test from {}.{}'.format(klass.__name__, name) return meth for name in dir(klass): if not name.startswith('validate_'): diff --git a/nibabel/tests/test_proxy_api.py b/nibabel/tests/test_proxy_api.py index 1694254a0c..48a024795d 100644 --- a/nibabel/tests/test_proxy_api.py +++ b/nibabel/tests/test_proxy_api.py @@ -57,7 +57,7 @@ from numpy.testing import assert_almost_equal, assert_array_equal, assert_allclose -from ..testing import data_path as DATA_PATH, assert_dt_equal +from ..testing import data_path as DATA_PATH, assert_dt_equal, clear_and_catch_warnings from ..tmpdirs import InTemporaryDirectory @@ -132,24 +132,38 @@ def validate_asarray(self, pmaker, params): # Shape matches expected shape assert_equal(out.shape, params['shape']) - def validate_get_scaled(self, pmaker, params): + def validate_array_interface_with_dtype(self, pmaker, params): # Check proxy returns expected array from asarray prox, fio, hdr = pmaker() - out = prox.get_scaled() - assert_array_equal(out, params['arr_out']) - assert_dt_equal(out.dtype, params['dtype_out']) - # Shape matches expected shape - assert_equal(out.shape, params['shape']) + orig = np.array(prox, dtype=None) + assert_array_equal(orig, params['arr_out']) + assert_dt_equal(orig.dtype, params['dtype_out']) + + context = None + if np.issubdtype(orig.dtype, np.complexfloating): + context = clear_and_catch_warnings() + context.__enter__() + warnings.simplefilter('ignore', np.ComplexWarning) for dtype in np.sctypes['float'] + np.sctypes['int'] + np.sctypes['uint']: - out = prox.get_scaled(dtype=dtype) + # Directly coerce with a dtype + direct = dtype(prox) # Half-precision is imprecise. Obviously. It's a bad idea, but don't break # the test over it. rtol = 1e-03 if dtype == np.float16 else 1e-05 - assert_allclose(out, params['arr_out'].astype(out.dtype), rtol=rtol, atol=1e-08) - assert_greater_equal(out.dtype, np.dtype(dtype)) - # Shape matches expected shape - assert_equal(out.shape, params['shape']) + assert_allclose(direct, orig.astype(dtype), rtol=rtol, atol=1e-08) + assert_dt_equal(direct.dtype, np.dtype(dtype)) + assert_equal(direct.shape, params['shape']) + # All three methods should produce equivalent results + for arrmethod in (np.array, np.asarray, np.asanyarray): + out = arrmethod(prox, dtype=dtype) + assert_array_equal(out, direct) + assert_dt_equal(out.dtype, np.dtype(dtype)) + # Shape matches expected shape + assert_equal(out.shape, params['shape']) + + if context is not None: + context.__exit__() def validate_header_isolated(self, pmaker, params): # Confirm altering input header has no effect