diff --git a/nibabel/dataobj_images.py b/nibabel/dataobj_images.py index 4eb2bcce96..b88f02dd21 100644 --- a/nibabel/dataobj_images.py +++ b/nibabel/dataobj_images.py @@ -41,6 +41,7 @@ def __init__(self, dataobj, header=None, extra=None, file_map=None): super(DataobjImage, self).__init__(header=header, extra=extra, file_map=file_map) self._dataobj = dataobj + self._fdata_cache = None self._data_cache = None @property @@ -55,7 +56,19 @@ def _data(self): return self._dataobj def get_data(self, caching='fill'): - """ Return image data from image with any necessary scalng applied + """ Return image data from image with any necessary scaling applied + + .. WARNING:: + + We recommend you use the ``get_fdata`` method instead of the + ``get_data`` method, because it is easier to predict the return + data type. We will deprecate the ``get_data`` method around April + 2018, and remove it around April 2020. + + If you don't care about the predictability of the return data type, + and you want the minimum possible data size in memory, you can + replicate the array that would be returned by ``img.get_data()`` by + using ``np.asanyarray(img.dataobj)``. The image ``dataobj`` property can be an array proxy or an array. An array proxy is an object that knows how to load the image data from @@ -125,7 +138,7 @@ def get_data(self, caching='fill'): (no reference to the array). If the cache is full, "unchanged" leaves the cache full and returns the cached array reference. - The cache can effect the behavior of the image, because if the cache is + The cache can affect the behavior of the image, because if the cache is full, or you have an array image, then modifying the returned array will modify the result of future calls to ``get_data()``. For example you might do this: @@ -191,11 +204,160 @@ def get_data(self, caching='fill'): self._data_cache = data return data + def get_fdata(self, caching='fill', dtype=np.float64): + """ Return floating point image data with necessary scaling applied + + The image ``dataobj`` property can be an array proxy or an array. An + array proxy is an object that knows how to load the image data from + disk. An image with an array proxy ``dataobj`` is a *proxy image*; an + image with an array in ``dataobj`` is an *array image*. + + The default behavior for ``get_fdata()`` on a proxy image is to read + the data from the proxy, and store in an internal cache. Future calls + to ``get_fdata`` will return the cached array. This is the behavior + selected with `caching` == "fill". + + Once the data has been cached and returned from an array proxy, if you + modify the returned array, you will also modify the cached array + (because they are the same array). Regardless of the `caching` flag, + this is always true of an array image. + + Parameters + ---------- + caching : {'fill', 'unchanged'}, optional + See the Notes section for a detailed explanation. This argument + specifies whether the image object should fill in an internal + cached reference to the returned image data array. "fill" specifies + that the image should fill an internal cached reference if + currently empty. Future calls to ``get_fdata`` will return this + cached reference. You might prefer "fill" to save the image object + from having to reload the array data from disk on each call to + ``get_fdata``. "unchanged" means that the image should not fill in + the internal cached reference if the cache is currently empty. You + might prefer "unchanged" to "fill" if you want to make sure that + the call to ``get_fdata`` does not create an extra (cached) + reference to the returned array. In this case it is easier for + Python to free the memory from the returned array. + dtype : numpy dtype specifier + A numpy dtype specifier specifying a floating point type. Data is + returned as this floating point type. Default is ``np.float64``. + + Returns + ------- + fdata : array + Array of image data of data type `dtype`. + + See also + -------- + uncache: empty the array data cache + + Notes + ----- + All images have a property ``dataobj`` that represents the image array + data. Images that have been loaded from files usually do not load the + array data from file immediately, in order to reduce image load time + and memory use. For these images, ``dataobj`` is an *array proxy*; an + object that knows how to load the image array data from file. + + By default (`caching` == "fill"), when you call ``get_fdata`` on a + proxy image, we load the array data from disk, store (cache) an + internal reference to this array data, and return the array. The next + time you call ``get_fdata``, you will get the cached reference to the + array, so we don't have to load the array data from disk again. + + Array images have a ``dataobj`` property that already refers to an + array in memory, so there is no benefit to caching, and the `caching` + keywords have no effect. + + For proxy images, you may not want to fill the cache after reading the + data from disk because the cache will hold onto the array memory until + the image object is deleted, or you use the image ``uncache`` method. + If you don't want to fill the cache, then always use + ``get_fdata(caching='unchanged')``; in this case ``get_fdata`` will not + fill the cache (store the reference to the array) if the cache is empty + (no reference to the array). If the cache is full, "unchanged" leaves + the cache full and returns the cached array reference. + + The cache can effect the behavior of the image, because if the cache is + full, or you have an array image, then modifying the returned array + will modify the result of future calls to ``get_fdata()``. For example + you might do this: + + >>> import os + >>> import nibabel as nib + >>> from nibabel.testing import data_path + >>> img_fname = os.path.join(data_path, 'example4d.nii.gz') + + >>> img = nib.load(img_fname) # This is a proxy image + >>> nib.is_proxy(img.dataobj) + True + + The array is not yet cached by a call to "get_fdata", so: + + >>> img.in_memory + False + + After we call ``get_fdata`` using the default `caching` == 'fill', the + cache contains a reference to the returned array ``data``: + + >>> data = img.get_fdata() + >>> img.in_memory + True + + We modify an element in the returned data array: + + >>> data[0, 0, 0, 0] + 0.0 + >>> data[0, 0, 0, 0] = 99 + >>> data[0, 0, 0, 0] + 99.0 + + The next time we call 'get_fdata', the method returns the cached + reference to the (modified) array: + + >>> data_again = img.get_fdata() + >>> data_again is data + True + >>> data_again[0, 0, 0, 0] + 99.0 + + If you had *initially* used `caching` == 'unchanged' then the returned + ``data`` array would have been loaded from file, but not cached, and: + + >>> img = nib.load(img_fname) # a proxy image again + >>> data = img.get_fdata(caching='unchanged') + >>> img.in_memory + False + >>> data[0, 0, 0] = 99 + >>> data_again = img.get_fdata(caching='unchanged') + >>> data_again is data + False + >>> data_again[0, 0, 0, 0] + 0.0 + """ + if caching not in ('fill', 'unchanged'): + raise ValueError('caching value should be "fill" or "unchanged"') + dtype = np.dtype(dtype) + if not issubclass(dtype.type, np.inexact): + raise ValueError('{} should be floating point type'.format(dtype)) + # Return cache if cache present and of correct dtype. + if self._fdata_cache is not None: + if self._fdata_cache.dtype.type == dtype.type: + return self._fdata_cache + data = np.asanyarray(self._dataobj).astype(dtype) + if caching == 'fill': + self._fdata_cache = data + return data + @property def in_memory(self): - """ True when array data is in memory + """ True when any array data is in memory cache + + There are separate caches for `get_data` reads and `get_fdata` reads. + This property is True if either of those caches are set. """ return (isinstance(self._dataobj, np.ndarray) or + self._fdata_cache is not None or self._data_cache is not None) def uncache(self): @@ -206,23 +368,24 @@ def uncache(self): * *array images* where the data ``img.dataobj`` is an array * *proxy images* where the data ``img.dataobj`` is a proxy object - If you call ``img.get_data()`` on a proxy image, the result of reading + If you call ``img.get_fdata()`` on a proxy image, the result of reading from the proxy gets cached inside the image object, and this cache is - what gets returned from the next call to ``img.get_data()``. If you + what gets returned from the next call to ``img.get_fdata()``. If you modify the returned data, as in:: - data = img.get_data() + data = img.get_fdata() data[:] = 42 - then the next call to ``img.get_data()`` returns the modified array, + then the next call to ``img.get_fdata()`` returns the modified array, whether the image is an array image or a proxy image:: - assert np.all(img.get_data() == 42) + assert np.all(img.get_fdata() == 42) When you uncache an array image, this has no effect on the return of - ``img.get_data()``, but when you uncache a proxy image, the result of - ``img.get_data()`` returns to its original value. + ``img.get_fdata()``, but when you uncache a proxy image, the result of + ``img.get_fdata()`` returns to its original value. """ + self._fdata_cache = None self._data_cache = None @property diff --git a/nibabel/tests/test_filebasedimages.py b/nibabel/tests/test_filebasedimages.py index 469c60d803..9a6f8b3db7 100644 --- a/nibabel/tests/test_filebasedimages.py +++ b/nibabel/tests/test_filebasedimages.py @@ -29,6 +29,9 @@ def shape(self): def get_data(self): return self.arr + def get_fdata(self): + return self.arr.astype(np.float64) + @classmethod def from_file_map(klass, file_map): with file_map['image'].get_prepare_fileobj('rb') as fobj: diff --git a/nibabel/tests/test_image_api.py b/nibabel/tests/test_image_api.py index c2f177ff79..d71fbcdb30 100644 --- a/nibabel/tests/test_image_api.py +++ b/nibabel/tests/test_image_api.py @@ -12,10 +12,13 @@ * ``img.affine`` (4x4 float ``np.ndarray`` relating spatial voxel coordinates to world space) * ``img.shape`` (shape of data as read with ``np.array(img.dataobj)`` -* ``img.get_data()`` (returns data as read with ``np.array(img.dataobj)``) -* ``img.uncache()`` (``img.get_data()`` is allowed to cache the result of the - array creation. If it does, this call empties that cache. Implement this - as a no-op if ``get_data()`` does not cache. +* ``img.get_fdata()`` (returns floating point data as read with + ``np.array(img.dataobj)`` and the cast to float); +* ``img.get_data()`` (returns data as read with ``np.array(img.dataobj)``); +* ``img.uncache()`` (``img.get_data()`` and ``img.get_data`` are allowed to + cache the result of the array creation. If they do, this call empties that + cache. Implement this as a no-op if ``get_fdata()``, ``get_data`` do not + cache. * ``img[something]`` generates an informative TypeError * ``img.in_memory`` is True for an array image, and for a proxy image that is cached, but False otherwise. @@ -34,13 +37,12 @@ from .. import (AnalyzeImage, Spm99AnalyzeImage, Spm2AnalyzeImage, Nifti1Pair, Nifti1Image, Nifti2Pair, Nifti2Image, - MGHImage, Minc1Image, Minc2Image) + MGHImage, Minc1Image, Minc2Image, is_proxy) from ..spatialimages import SpatialImage from .. import minc1, minc2, parrec from nose import SkipTest -from nose.tools import (assert_true, assert_false, assert_raises, - assert_equal, assert_not_equal) +from nose.tools import (assert_true, assert_false, assert_raises, assert_equal) from numpy.testing import (assert_almost_equal, assert_array_equal) from ..testing import clear_and_catch_warnings @@ -86,7 +88,7 @@ def obj_params(self): Expected properties of image returned from ``img_creator`` callable. Key, value pairs should include: - * ``data`` : array returned from ``get_data()`` on image - OR - + * ``data`` : array returned from ``get_fdata()`` on image - OR - ``data_summary`` : dict with data ``min``, ``max``, ``mean``; * ``shape`` : shape of image; * ``affine`` : shape (4, 4) affine array for image; @@ -128,6 +130,8 @@ def validate_filenames(self, imaker, params): # The bytesio_round_trip helper tests bytesio load / save via file_map rt_img = bytesio_round_trip(img) assert_array_equal(img.shape, rt_img.shape) + assert_almost_equal(img.get_fdata(), rt_img.get_fdata()) + # get_data will be deprecated assert_almost_equal(img.get_data(), rt_img.get_data()) # Give the image a file map klass = type(img) @@ -135,6 +139,8 @@ def validate_filenames(self, imaker, params): # This object can now be saved and loaded from its own file_map rt_img.to_file_map() rt_rt_img = klass.from_file_map(rt_img.file_map) + assert_almost_equal(img.get_fdata(), rt_rt_img.get_fdata()) + # get_data will be deprecated assert_almost_equal(img.get_data(), rt_rt_img.get_data()) # get_ / set_ filename fname = 'an_image' + self.standard_extension @@ -147,6 +153,8 @@ def validate_filenames(self, imaker, params): img.to_filename(fname) rt_img = img.__class__.from_filename(fname) assert_array_equal(img.shape, rt_img.shape) + assert_almost_equal(img.get_fdata(), rt_img.get_fdata()) + # get_data will be deprecated assert_almost_equal(img.get_data(), rt_img.get_data()) del rt_img # to allow windows to delete the directory @@ -192,72 +200,158 @@ def validate_data_interface(self, imaker, params): img = imaker() assert_equal(img.shape, img.dataobj.shape) assert_data_similar(img.dataobj, params) - if params['is_proxy']: - assert_false(isinstance(img.dataobj, np.ndarray)) - proxy_data = np.asarray(img.dataobj) - proxy_copy = proxy_data.copy() - # Not yet cached, proxy image: in_memory is False - assert_false(img.in_memory) - # Load with caching='unchanged' - data = img.get_data(caching='unchanged') - # Still not cached - assert_false(img.in_memory) - # Default load, does caching - data = img.get_data() - # Data now cached - assert_true(img.in_memory) - assert_false(proxy_data is data) - # Now caching='unchanged' does nothing, returns cached version - data_again = img.get_data(caching='unchanged') - assert_true(data is data_again) - # caching='fill' does nothing because the cache is already full - data_yet_again = img.get_data(caching='fill') - assert_true(data is data_yet_again) - # changing array data does not change proxy data, or reloaded data - data[:] = 42 - assert_array_equal(proxy_data, proxy_copy) - assert_array_equal(np.asarray(img.dataobj), proxy_copy) - # It does change the result of get_data - assert_array_equal(img.get_data(), 42) - # until we uncache - img.uncache() - # Which unsets in_memory - assert_false(img.in_memory) - assert_array_equal(img.get_data(), proxy_copy) - # Check caching='fill' does cache data - img = imaker() - assert_false(img.in_memory) - data = img.get_data(caching='fill') - assert_true(img.in_memory) - data_again = img.get_data() - assert_true(data is data_again) - else: # not proxy - for caching in (None, 'fill', 'unchanged'): + meth_names = ('get_fdata', 'get_data') + for meth_name in meth_names: + if params['is_proxy']: + # Parameters assert this is an array proxy img = imaker() - get_data_func = (img.get_data if caching is None else - partial(img.get_data, caching=caching)) - assert_true(isinstance(img.dataobj, np.ndarray)) + # Does is_proxy agree? + assert_true(is_proxy(img.dataobj)) + # Confirm it is not a numpy array + assert_false(isinstance(img.dataobj, np.ndarray)) + # Confirm it can be converted to a numpy array with asarray + proxy_data = np.asarray(img.dataobj) + proxy_copy = proxy_data.copy() + # Not yet cached, proxy image: in_memory is False + assert_false(img.in_memory) + # Load with caching='unchanged' + method = getattr(img, meth_name) + data = method(caching='unchanged') + # Still not cached + assert_false(img.in_memory) + # Default load, does caching + data = method() + # Data now cached. in_memory is True if either of the get_data + # or get_fdata caches are not-None assert_true(img.in_memory) - data = get_data_func() - assert_true(data is img.dataobj) - # changing array data does change proxy data, and reloaded data + # We previously got proxy_data from disk, but data, which we + # have just fetched, is a fresh copy. + assert_false(proxy_data is data) + # asarray on dataobj, applied above, returns same numerical + # values. This might not be true get_fdata operating on huge + # integers, but lets assume that's not true here. + assert_array_equal(proxy_data, data) + # Now caching='unchanged' does nothing, returns cached version + data_again = method(caching='unchanged') + assert_true(data is data_again) + # caching='fill' does nothing because the cache is already full + data_yet_again = method(caching='fill') + assert_true(data is data_yet_again) + # changing array data does not change proxy data, or reloaded + # data data[:] = 42 - assert_array_equal(np.asarray(img.dataobj), 42) + assert_array_equal(proxy_data, proxy_copy) + assert_array_equal(np.asarray(img.dataobj), proxy_copy) # It does change the result of get_data - assert_array_equal(get_data_func(), 42) - # Unache has no effect + assert_array_equal(method(), 42) + # until we uncache img.uncache() - assert_array_equal(get_data_func(), 42) + # Which unsets in_memory + assert_false(img.in_memory) + assert_array_equal(method(), proxy_copy) + # Check caching='fill' does cache data + img = imaker() + method = getattr(img, meth_name) + assert_false(img.in_memory) + data = method(caching='fill') assert_true(img.in_memory) - # Data shape is same as image shape - assert_equal(img.shape, img.get_data().shape) + data_again = method() + assert_true(data is data_again) + # Check the interaction of caching with get_data, get_fdata. + # Caching for `get_data` should have no effect on caching for + # get_fdata, and vice versa. + # Modify the cached data + data[:] = 43 + # Load using the other data fetch method + other_name = set(meth_names).difference({meth_name}).pop() + other_method = getattr(img, other_name) + other_data = other_method() + # We get the original data, not the modified cache + assert_array_equal(proxy_data, other_data) + assert_false(np.all(data == other_data)) + # We can modify the other cache, without affecting the first + other_data[:] = 44 + assert_array_equal(other_method(), 44) + assert_false(np.all(method() == other_method())) + # Check that caching refreshes for new floating point type. + if meth_name == 'get_fdata': + img.uncache() + fdata = img.get_fdata() + assert_equal(fdata.dtype, np.float64) + fdata[:] = 42 + fdata_back = img.get_fdata() + assert_array_equal(fdata_back, 42) + assert_equal(fdata_back.dtype, np.float64) + # New data dtype, no caching, doesn't use or alter cache + fdata_new_dt = img.get_fdata(caching='unchanged', dtype='f4') + # We get back the original read, not the modified cache + assert_array_equal(fdata_new_dt, proxy_data.astype('f4')) + assert_equal(fdata_new_dt.dtype, np.float32) + # The original cache stays in place, for default float64 + assert_array_equal(img.get_fdata(), 42) + # And for not-default float32, because we haven't cached + fdata_new_dt[:] = 43 + fdata_new_dt = img.get_fdata(caching='unchanged', dtype='f4') + assert_array_equal(fdata_new_dt, proxy_data.astype('f4')) + # Until we reset with caching='fill', at which point we + # drop the original float64 cache, and have a float32 cache + fdata_new_dt = img.get_fdata(caching='fill', dtype='f4') + assert_array_equal(fdata_new_dt, proxy_data.astype('f4')) + # We're using the cache, for dtype='f4' reads + fdata_new_dt[:] = 43 + assert_array_equal(img.get_fdata(dtype='f4'), 43) + # We've lost the cache for float64 reads (no longer 42) + assert_array_equal(img.get_fdata(), proxy_data) + else: # not proxy + for caching in (None, 'fill', 'unchanged'): + img = imaker() + method = getattr(img, meth_name) + get_data_func = (method if caching is None else + partial(method, caching=caching)) + assert_true(isinstance(img.dataobj, np.ndarray)) + assert_true(img.in_memory) + data = get_data_func() + # Returned data same object as underlying dataobj if using + # old ``get_data`` method, or using newer ``get_fdata`` + # method, where original array was float64. + dataobj_is_data = (img.dataobj.dtype == np.float64 + or method == img.get_data) + # Set something to the output array. + data[:] = 42 + get_result_changed = np.all(get_data_func() == 42) + assert_equal(get_result_changed, + dataobj_is_data or caching != 'unchanged') + if dataobj_is_data: + assert_true(data is img.dataobj) + # Changing array data changes + # data + assert_array_equal(np.asarray(img.dataobj), 42) + # Uncache has no effect + img.uncache() + assert_array_equal(get_data_func(), 42) + else: + assert_false(data is img.dataobj) + assert_false(np.all(np.asarray(img.dataobj) == 42)) + # Uncache does have an effect + img.uncache() + assert_false(np.all(get_data_func() == 42)) + # in_memory is always true for array images, regardless of + # cache state. + img.uncache() + assert_true(img.in_memory) + # Values to get_data caching parameter must be 'fill' or + # 'unchanged' + assert_raises(ValueError, img.get_data, caching='something') + # Data shape is same as image shape + assert_equal(img.shape, method().shape) + # Values to get_data caching parameter must be 'fill' or + # 'unchanged' + assert_raises(ValueError, img.get_data, caching='something') # dataobj is read only fake_data = np.zeros(img.shape).astype(img.get_data_dtype()) assert_raises(AttributeError, setattr, img, 'dataobj', fake_data) # So is in_memory assert_raises(AttributeError, setattr, img, 'in_memory', False) - # Values to get_data caching parameter must be 'fill' or 'unchanged' - assert_raises(ValueError, img.get_data, caching='something') def validate_data_deprecated(self, imaker, params): # Check _data property still exists, but raises warning diff --git a/nibabel/tests/test_spatialimages.py b/nibabel/tests/test_spatialimages.py index 032450c6b9..1033276bd4 100644 --- a/nibabel/tests/test_spatialimages.py +++ b/nibabel/tests/test_spatialimages.py @@ -306,6 +306,62 @@ def test_get_shape(self): img = img_klass(np.zeros((2, 3, 4), np.int16), np.eye(4)) assert_equal(img.get_shape(), (2, 3, 4)) + def test_get_fdata(self): + # Test array image and proxy image interface for floating point data + img_klass = self.image_class + in_data_template = np.arange(24, dtype=np.int16).reshape((2, 3, 4)) + in_data = in_data_template.copy() + img = img_klass(in_data, None) + assert_true(in_data is img.dataobj) + # The get_fdata method changes the array to floating point type + assert_equal(img.get_fdata(dtype='f4').dtype, np.dtype(np.float32)) + fdata_32 = img.get_fdata(dtype=np.float32) + assert_equal(fdata_32.dtype, np.dtype(np.float32)) + # Caching is specific to data dtype. If we reload with default data + # type, the cache gets reset + fdata_32[:] = 99 + # Cache has been modified, we pick up the modifications, but only for + # the cached data type + assert_array_equal(img.get_fdata(dtype='f4'), 99) + fdata_64 = img.get_fdata() + assert_equal(fdata_64.dtype, np.dtype(np.float64)) + assert_array_equal(fdata_64, in_data) + fdata_64[:] = 101 + assert_array_equal(img.get_fdata(dtype='f8'), 101) + assert_array_equal(img.get_fdata(), 101) + # Reloading with new data type blew away the float32 cache + assert_array_equal(img.get_fdata(dtype='f4'), in_data) + img.uncache() + # Now recaching, is float64 + out_data = img.get_fdata() + assert_equal(out_data.dtype, np.dtype(np.float64)) + # Input dtype needs to be floating point + assert_raises(ValueError, img.get_fdata, dtype=np.int16) + assert_raises(ValueError, img.get_fdata, dtype=np.int32) + # The cache is filled + out_data[:] = 42 + assert_true(img.get_fdata() is out_data) + img.uncache() + assert_false(img.get_fdata() is out_data) + # The 42 has gone now. + assert_array_equal(img.get_fdata(), in_data_template) + # If we can save, we can create a proxy image + if not self.can_save: + return + rt_img = bytesio_round_trip(img) + assert_false(in_data is rt_img.dataobj) + assert_array_equal(rt_img.dataobj, in_data) + out_data = rt_img.get_fdata() + assert_array_equal(out_data, in_data) + assert_false(rt_img.dataobj is out_data) + assert_equal(out_data.dtype, np.dtype(np.float64)) + # cache + assert_true(rt_img.get_fdata() is out_data) + out_data[:] = 42 + rt_img.uncache() + assert_false(rt_img.get_fdata() is out_data) + assert_array_equal(rt_img.get_fdata(), in_data) + def test_get_data(self): # Test array image and proxy image interface img_klass = self.image_class