diff --git a/.travis.yml b/.travis.yml index 2c6ad370f26..9a39105a96d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ matrix: - python: 2.7 env: CONDA_ENV=py27-min - python: 2.7 - env: CONDA_ENV=py27-cdat+pynio + env: CONDA_ENV=py27-cdat+iris+pynio - python: 3.4 env: CONDA_ENV=py34 - python: 3.5 diff --git a/ci/requirements-py27-cdat+pynio.yml b/ci/requirements-py27-cdat+iris+pynio.yml similarity index 95% rename from ci/requirements-py27-cdat+pynio.yml rename to ci/requirements-py27-cdat+iris+pynio.yml index 8bf4fe601e2..6c7e3b87318 100644 --- a/ci/requirements-py27-cdat+pynio.yml +++ b/ci/requirements-py27-cdat+iris+pynio.yml @@ -22,6 +22,7 @@ dependencies: - seaborn - toolz - rasterio + - iris>=1.10 - zarr - pip: - coveralls diff --git a/doc/api.rst b/doc/api.rst index 60b0aa47e1f..caed7cced76 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -448,6 +448,8 @@ DataArray methods DataArray.to_index DataArray.to_masked_array DataArray.to_cdms2 + DataArray.to_iris + DataArray.from_iris DataArray.to_dict DataArray.from_series DataArray.from_cdms2 diff --git a/doc/environment.yml b/doc/environment.yml index 45fa6417e16..b14fba351c1 100644 --- a/doc/environment.yml +++ b/doc/environment.yml @@ -17,3 +17,4 @@ dependencies: - rasterio=0.36.0 - sphinx-gallery - zarr + - iris diff --git a/doc/io.rst b/doc/io.rst index 14e82d4aacc..eac941b336b 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -338,6 +338,38 @@ supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'nol By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time difference between values, with a reference time of the first time value. +.. _io.iris: + +Iris +---- + +The Iris_ tool allows easy reading of common meteorological and climate model formats +(including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very +similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is +installed xarray can convert a ``DataArray`` into a ``Cube`` using +:py:meth:`~xarray.DataArray.to_iris`: + +.. ipython:: python + + da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], + coords=dict(x=[10, 20, 30, 40], + y=pd.date_range('2000-01-01', periods=5))) + + cube = da.to_iris() + cube + +Conversely, we can create a new ``DataArray`` object from a ``Cube`` using +:py:meth:`~xarray.DataArray.from_iris`: + +.. ipython:: python + + da_cube = xr.DataArray.from_iris(cube) + da_cube + + +.. _Iris: http://scitools.org.uk/iris + + OPeNDAP ------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4149432b117..ee05469c5af 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Enhancements 1D co-ordinate (e.g. time) and a 2D co-ordinate (e.g. depth as a function of time) (:issue:`1737`). By `Deepak Cherian `_. +- Added :py:meth:`DataArray.to_iris ` and :py:meth:`DataArray.from_iris ` for + converting data arrays to and from Iris_ Cubes with the same data and coordinates (:issue:`621` and :issue:`37`). + By `Neil Parley `_ and `Duncan Watson-Parris `_. - Use ``pandas.Grouper`` class in xarray resample methods rather than the deprecated ``pandas.TimeGrouper`` class (:issue:`1766`). By `Joe Hamman `_. @@ -36,6 +39,7 @@ Enhancements .. _Zarr: http://zarr.readthedocs.io/ +.. _Iris: http://scitools.org.uk/iris Bug fixes ~~~~~~~~~ diff --git a/xarray/convert.py b/xarray/convert.py index 26822ea97c8..446bd5a0d35 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -7,24 +7,42 @@ import numpy as np from .core.dataarray import DataArray +from .core.pycompat import OrderedDict, range +from .core.dtypes import get_fill_value from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) -ignored_attrs = set(['name', 'tileIndex']) +cdms2_ignored_attrs = {'name', 'tileIndex'} +iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', + 'calendar', 'leap_month', 'leap_year', 'month_lengths', + 'coordinates', 'grid_mapping', 'climatology', + 'cell_methods', 'formula_terms', 'compress', + 'missing_value', 'add_offset', 'scale_factor', + 'valid_max', 'valid_min', 'valid_range', '_FillValue'} +cell_methods_strings = {'point', 'sum', 'maximum', 'median', 'mid_range', + 'minimum', 'mean', 'mode', 'standard_deviation', + 'variance'} + + +def encode(var): + return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) + + +def _filter_attrs(attrs, ignored_attrs): + """ Return attrs that are not in ignored_attrs + """ + return dict((k, v) for k, v in attrs.items() if k not in ignored_attrs) def from_cdms2(variable): """Convert a cdms2 variable into an DataArray """ - def get_cdms2_attrs(var): - return dict((k, v) for k, v in var.attributes.items() - if k not in ignored_attrs) - values = np.asarray(variable) name = variable.id - coords = [(v.id, np.asarray(v), get_cdms2_attrs(v)) + coords = [(v.id, np.asarray(v), + _filter_attrs(v.attributes, cdms2_ignored_attrs)) for v in variable.getAxisList()] - attrs = get_cdms2_attrs(variable) + attrs = _filter_attrs(variable.attributes, cdms2_ignored_attrs) dataarray = DataArray(values, coords=coords, name=name, attrs=attrs) return decode_cf(dataarray.to_dataset())[dataarray.name] @@ -35,9 +53,6 @@ def to_cdms2(dataarray): # we don't want cdms2 to be a hard dependency import cdms2 - def encode(var): - return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) - def set_cdms2_attrs(var, attrs): for k, v in attrs.items(): setattr(var, k, v) @@ -53,3 +68,150 @@ def set_cdms2_attrs(var, attrs): cdms2_var = cdms2.createVariable(var.values, axes=axes, id=dataarray.name) set_cdms2_attrs(cdms2_var, var.attrs) return cdms2_var + + +def _pick_attrs(attrs, keys): + """ Return attrs with keys in keys list + """ + return dict((k, v) for k, v in attrs.items() if k in keys) + + +def _get_iris_args(attrs): + """ Converts the xarray attrs into args that can be passed into Iris + """ + # iris.unit is deprecated in Iris v1.9 + import cf_units + args = {'attributes': _filter_attrs(attrs, iris_forbidden_keys)} + args.update(_pick_attrs(attrs, ('standard_name', 'long_name',))) + unit_args = _pick_attrs(attrs, ('calendar',)) + if 'units' in attrs: + args['units'] = cf_units.Unit(attrs['units'], **unit_args) + return args + + +# TODO: Add converting bounds from xarray to Iris and back +def to_iris(dataarray): + """ Convert a DataArray into a Iris Cube + """ + # Iris not a hard dependency + import iris + from iris.fileformats.netcdf import parse_cell_methods + from xarray.core.pycompat import dask_array_type + + dim_coords = [] + aux_coords = [] + + for coord_name in dataarray.coords: + coord = encode(dataarray.coords[coord_name]) + coord_args = _get_iris_args(coord.attrs) + coord_args['var_name'] = coord_name + axis = None + if coord.dims: + axis = dataarray.get_axis_num(coord.dims) + if coord_name in dataarray.dims: + iris_coord = iris.coords.DimCoord(coord.values, **coord_args) + dim_coords.append((iris_coord, axis)) + else: + iris_coord = iris.coords.AuxCoord(coord.values, **coord_args) + aux_coords.append((iris_coord, axis)) + + args = _get_iris_args(dataarray.attrs) + args['var_name'] = dataarray.name + args['dim_coords_and_dims'] = dim_coords + args['aux_coords_and_dims'] = aux_coords + if 'cell_methods' in dataarray.attrs: + args['cell_methods'] = \ + parse_cell_methods(dataarray.attrs['cell_methods']) + + # Create the right type of masked array (should be easier after #1769) + if isinstance(dataarray.data, dask_array_type): + from dask.array import ma as dask_ma + masked_data = dask_ma.masked_invalid(dataarray) + else: + masked_data = np.ma.masked_invalid(dataarray) + + cube = iris.cube.Cube(masked_data, **args) + + return cube + + +def _iris_obj_to_attrs(obj): + """ Return a dictionary of attrs when given a Iris object + """ + attrs = {'standard_name': obj.standard_name, + 'long_name': obj.long_name} + if obj.units.calendar: + attrs['calendar'] = obj.units.calendar + if obj.units.origin != '1': + attrs['units'] = obj.units.origin + attrs.update(obj.attributes) + return dict((k, v) for k, v in attrs.items() if v is not None) + + +def _iris_cell_methods_to_str(cell_methods_obj): + """ Converts a Iris cell methods into a string + """ + cell_methods = [] + for cell_method in cell_methods_obj: + names = ''.join(['{}: '.format(n) for n in cell_method.coord_names]) + intervals = ' '.join(['interval: {}'.format(interval) + for interval in cell_method.intervals]) + comments = ' '.join(['comment: {}'.format(comment) + for comment in cell_method.comments]) + extra = ' '.join([intervals, comments]).strip() + if extra: + extra = ' ({})'.format(extra) + cell_methods.append(names + cell_method.method + extra) + return ' '.join(cell_methods) + + +def from_iris(cube): + """ Convert a Iris cube into an DataArray + """ + import iris.exceptions + from xarray.core.pycompat import dask_array_type + + name = cube.var_name + dims = [] + for i in range(cube.ndim): + try: + dim_coord = cube.coord(dim_coords=True, dimensions=(i,)) + dims.append(dim_coord.var_name) + except iris.exceptions.CoordinateNotFoundError: + dims.append("dim_{}".format(i)) + + coords = OrderedDict() + + for coord in cube.coords(): + coord_attrs = _iris_obj_to_attrs(coord) + coord_dims = [dims[i] for i in cube.coord_dims(coord)] + if not coord.var_name: + raise ValueError("Coordinate '{}' has no " + "var_name attribute".format(coord.name())) + if coord_dims: + coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) + else: + coords[coord.var_name] = ((), + np.asscalar(coord.points), coord_attrs) + + array_attrs = _iris_obj_to_attrs(cube) + cell_methods = _iris_cell_methods_to_str(cube.cell_methods) + if cell_methods: + array_attrs['cell_methods'] = cell_methods + + # Deal with iris 1.* and 2.* + cube_data = cube.core_data() if hasattr(cube, 'core_data') else cube.data + + # Deal with dask and numpy masked arrays + if isinstance(cube_data, dask_array_type): + from dask.array import ma as dask_ma + filled_data = dask_ma.filled(cube_data, get_fill_value(cube.dtype)) + elif isinstance(cube_data, np.ma.MaskedArray): + filled_data = np.ma.filled(cube_data, get_fill_value(cube.dtype)) + else: + filled_data = cube_data + + dataarray = DataArray(filled_data, coords=coords, name=name, + attrs=array_attrs, dims=dims) + decoded_ds = decode_cf(dataarray._to_temp_dataset()) + return dataarray._from_temp_dataset(decoded_ds) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4ab5136d071..170fbb98822 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1539,6 +1539,19 @@ def from_cdms2(cls, variable): from ..convert import from_cdms2 return from_cdms2(variable) + def to_iris(self): + """Convert this array into a iris.cube.Cube + """ + from ..convert import to_iris + return to_iris(self) + + @classmethod + def from_iris(cls, cube): + """Convert a iris.cube.Cube into an xarray.DataArray + """ + from ..convert import from_iris + return from_iris(cube) + def _all_compat(self, other, compat_str): """Helper function for equals and identical""" diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2a14742c948..0399002c6e6 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -15,7 +15,7 @@ IndexVariable, Variable) from xarray.core.pycompat import iteritems, OrderedDict from xarray.core.common import full_like - +from xarray.conventions import maybe_encode_datetime from xarray.tests import ( TestCase, ReturnItem, source_ndarray, unittest, requires_dask, assert_identical, assert_equal, assert_allclose, assert_array_equal, @@ -2849,6 +2849,150 @@ def test_to_and_from_cdms2(self): roundtripped = DataArray.from_cdms2(actual) self.assertDataArrayIdentical(original, roundtripped) + def test_to_and_from_iris(self): + try: + import iris + import cf_units + except ImportError: + raise unittest.SkipTest('iris not installed') + + coord_dict = OrderedDict() + coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) + coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) + coord_dict['height'] = 10 + coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) + coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) + + original = DataArray(np.arange(6, dtype='float').reshape(2, 3), + coord_dict, name='Temperature', + attrs={'baz': 123, 'units': 'Kelvin', + 'standard_name': 'fire_temperature', + 'long_name': 'Fire Temperature'}, + dims=('distance', 'time')) + + # Set a bad value to test the masking logic + original.data[0, 2] = np.NaN + + original.attrs['cell_methods'] = \ + 'height: mean (comment: A cell method)' + actual = original.to_iris() + self.assertArrayEqual(actual.data, original.data) + self.assertEqual(actual.var_name, original.name) + self.assertItemsEqual([d.var_name for d in actual.dim_coords], + original.dims) + self.assertEqual(actual.cell_methods, + (iris.coords.CellMethod(method='mean', + coords=('height',), + intervals=(), + comments=('A cell method',)),) + ) + + for coord, orginal_key in zip((actual.coords()), original.coords): + original_coord = original.coords[orginal_key] + self.assertEqual(coord.var_name, original_coord.name) + self.assertArrayEqual(coord.points, + maybe_encode_datetime(original_coord).values) + self.assertEqual(actual.coord_dims(coord), + original.get_axis_num + (original.coords[coord.var_name].dims)) + + self.assertEqual(actual.coord('distance2').attributes['foo'], + original.coords['distance2'].attrs['foo']) + self.assertEqual(actual.coord('distance').units, + cf_units.Unit(original.coords['distance'].units)) + self.assertEqual(actual.attributes['baz'], original.attrs['baz']) + self.assertEqual(actual.standard_name, original.attrs['standard_name']) + + roundtripped = DataArray.from_iris(actual) + self.assertDataArrayIdentical(original, roundtripped) + + actual.remove_coord('time') + auto_time_dimension = DataArray.from_iris(actual) + self.assertEqual(auto_time_dimension.dims, ('distance', 'dim_1')) + + actual.coord('distance').var_name = None + with raises_regex(ValueError, 'no var_name attribute'): + DataArray.from_iris(actual) + + @requires_dask + def test_to_and_from_iris_dask(self): + import dask.array as da + try: + import iris + import cf_units + except ImportError: + raise unittest.SkipTest('iris not installed') + + coord_dict = OrderedDict() + coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) + coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) + coord_dict['height'] = 10 + coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) + coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) + + original = DataArray(da.from_array( + np.arange(-1, 5, dtype='float').reshape(2, 3), 3), coord_dict, + name='Temperature', + attrs={'baz': 123, 'units': 'Kelvin', + 'standard_name': 'fire_temperature', + 'long_name': 'Fire Temperature'}, + dims=('distance', 'time')) + + # Set a bad value to test the masking logic + original.data = da.ma.masked_less(original.data, 0) + + original.attrs['cell_methods'] = \ + 'height: mean (comment: A cell method)' + actual = original.to_iris() + + # Be careful not to trigger the loading of the iris data + actual_data = actual.core_data() if \ + hasattr(actual, 'core_data') else actual.data + self.assertArrayEqual(actual_data, original.data) + self.assertEqual(actual.var_name, original.name) + self.assertItemsEqual([d.var_name for d in actual.dim_coords], + original.dims) + self.assertEqual(actual.cell_methods, + (iris.coords.CellMethod(method='mean', + coords=('height',), + intervals=(), + comments=('A cell method',)),) + ) + + for coord, orginal_key in zip((actual.coords()), original.coords): + original_coord = original.coords[orginal_key] + self.assertEqual(coord.var_name, original_coord.name) + self.assertArrayEqual(coord.points, + maybe_encode_datetime(original_coord).values) + self.assertEqual(actual.coord_dims(coord), + original.get_axis_num + (original.coords[coord.var_name].dims)) + + self.assertEqual(actual.coord('distance2').attributes['foo'], + original.coords['distance2'].attrs['foo']) + self.assertEqual(actual.coord('distance').units, + cf_units.Unit(original.coords['distance'].units)) + self.assertEqual(actual.attributes['baz'], original.attrs['baz']) + self.assertEqual(actual.standard_name, original.attrs['standard_name']) + + roundtripped = DataArray.from_iris(actual) + self.assertDataArrayIdentical(original, roundtripped) + + # If the Iris version supports it then we should get a dask array back + if hasattr(actual, 'core_data'): + pass + # TODO This currently fails due to the decoding loading + # the data (#1372) + # self.assertEqual(type(original.data), type(roundtripped.data)) + + actual.remove_coord('time') + auto_time_dimension = DataArray.from_iris(actual) + self.assertEqual(auto_time_dimension.dims, ('distance', 'dim_1')) + + actual.coord('distance').var_name = None + with raises_regex(ValueError, 'no var_name attribute'): + DataArray.from_iris(actual) + def test_to_dataset_whole(self): unnamed = DataArray([1, 2], dims='x') with raises_regex(ValueError, 'unable to convert unnamed'):