Skip to content

cube.aggregated_by and multidimensional auxcoords #3174

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
* :meth:`iris.cube.Cube.aggregated_by` now gives correct values in points and
bounds when handling multidimensional coordinates.
79 changes: 52 additions & 27 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,9 +1859,10 @@ def __init__(self, groupby_coords, shared_coords=None):

Kwargs:

* shared_coords (list of :class:`iris.coords.Coord` instances):
One or more coordinates that share the same group-by
coordinate axis.
* shared_coords (list of (:class:`iris.coords.Coord`, `int`) pairs):
One or more coordinates (including multidimensional coordinates)
that share the same group-by coordinate axis. The `int` identifies
which dimension of the coord is on the group-by coordinate axis.

"""
#: Group-by and shared coordinates that have been grouped.
Expand All @@ -1886,8 +1887,8 @@ def __init__(self, groupby_coords, shared_coords=None):
raise TypeError('shared_coords must be a '
'`collections.Iterable` type.')
# Add valid shared coordinates.
for coord in shared_coords:
self._add_shared_coord(coord)
for coord, dim in shared_coords:
self._add_shared_coord(coord, dim)

def _add_groupby_coord(self, coord):
if coord.ndim != 1:
Expand All @@ -1898,12 +1899,10 @@ def _add_groupby_coord(self, coord):
raise ValueError('Group-by coordinates have different lengths.')
self._groupby_coords.append(coord)

def _add_shared_coord(self, coord):
if coord.ndim != 1:
raise iris.exceptions.CoordinateMultiDimError(coord)
if coord.shape[0] != self._stop and self._stop is not None:
def _add_shared_coord(self, coord, dim):
if coord.shape[dim] != self._stop and self._stop is not None:
raise ValueError('Shared coordinates have different lengths.')
self._shared_coords.append(coord)
self._shared_coords.append((coord, dim))

def group(self):
"""
Expand Down Expand Up @@ -2030,18 +2029,36 @@ def _compute_shared_coords(self):
groupby_bounds.append((key_slice.start, key_slice.stop-1))

# Create new shared bounded coordinates.
for coord in self._shared_coords:
for coord, dim in self._shared_coords:
if coord.points.dtype.kind in 'SU':
if coord.bounds is None:
new_points = []
new_bounds = None
# np.apply_along_axis does not work with str.join, so we
# need to loop through the array directly. First move axis
# of interest to trailing dim and flatten the others.
work_arr = np.moveaxis(coord.points, dim, -1)
shape = work_arr.shape
work_shape = (-1, shape[-1])
new_shape = (len(self),)
if coord.ndim > 1:
new_shape += shape[:-1]
work_arr = work_arr.reshape(work_shape)

for key_slice in six.itervalues(self._slices_by_key):
if isinstance(key_slice, slice):
indices = key_slice.indices(coord.points.shape[0])
indices = key_slice.indices(
coord.points.shape[dim])
key_slice = range(*indices)
new_pt = '|'.join([coord.points[i]
for i in key_slice])
new_points.append(new_pt)

for arr in work_arr:
new_points.append('|'.join(arr.take(key_slice)))

# Reinstate flattened dimensions. Aggregated dim now leads.
new_points = np.array(new_points).reshape(new_shape)

# Move aggregated dimension back to position it started in.
new_points = np.moveaxis(new_points, 0, dim)
else:
msg = ('collapsing the bounded string coordinate {0!r}'
' is not supported'.format(coord.name()))
Expand All @@ -2054,27 +2071,35 @@ def _compute_shared_coords(self):
if coord.has_bounds():
# Collapse group bounds into bounds.
if (getattr(coord, 'circular', False) and
(stop + 1) == len(coord.points)):
new_bounds.append([coord.bounds[start, 0],
coord.bounds[0, 0] +
coord.units.modulus])
(stop + 1) == coord.shape[dim]):
new_bounds.append(
[coord.bounds.take(start, dim).take(0, -1),
coord.bounds.take(0, dim).take(0, -1) +
coord.units.modulus])
else:
new_bounds.append([coord.bounds[start, 0],
coord.bounds[stop, 1]])
new_bounds.append(
[coord.bounds.take(start, dim).take(0, -1),
coord.bounds.take(stop, dim).take(1, -1)])
else:
# Collapse group points into bounds.
if (getattr(coord, 'circular', False) and
(stop + 1) == len(coord.points)):
new_bounds.append([coord.points[start],
coord.points[0] +
coord.units.modulus])
new_bounds.append([coord.points.take(start, dim),
coord.points.take(0, dim) +
coord.units.modulus])
else:
new_bounds.append([coord.points[start],
coord.points[stop]])
new_bounds.append([coord.points.take(start, dim),
coord.points.take(stop, dim)])

# Bounds needs to be an array with the length 2 start-stop
# dimension last, and the aggregated dimension back in its
# original position.
new_bounds = np.moveaxis(
np.array(new_bounds), (0, 1), (dim, -1))

# Now create the new bounded group shared coordinate.
try:
new_points = np.array(new_bounds).mean(-1)
new_points = new_bounds.mean(-1)
except TypeError:
msg = 'The {0!r} coordinate on the collapsing dimension' \
' cannot be collapsed.'.format(coord.name())
Expand Down
14 changes: 11 additions & 3 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -3385,10 +3385,18 @@ def aggregated_by(self, coords, aggregator, **kwargs):
# coordinate dimension.
shared_coords = list(filter(
lambda coord_: coord_ not in groupby_coords,
self.coords(dimensions=dimension_to_groupby)))
self.coords(contains_dimension=dimension_to_groupby)))

# Determine which of each shared coord's dimensions will be aggregated.
shared_coords_and_dims = [
(coord_, index)
for coord_ in shared_coords
for (index, dim) in enumerate(self.coord_dims(coord_))
if dim == dimension_to_groupby]

# Create the aggregation group-by instance.
groupby = iris.analysis._Groupby(groupby_coords, shared_coords)
groupby = iris.analysis._Groupby(groupby_coords,
shared_coords_and_dims)

# Create the resulting aggregate-by cube and remove the original
# coordinates that are going to be groupedby.
Expand Down Expand Up @@ -3444,7 +3452,7 @@ def aggregated_by(self, coords, aggregator, **kwargs):
dimension_to_groupby)
else:
aggregateby_cube.add_aux_coord(coord.copy(),
dimension_to_groupby)
self.coord_dims(coord))

# Attach the aggregate-by data into the aggregate-by cube.
aggregateby_cube = aggregator.post_process(aggregateby_cube,
Expand Down
55 changes: 50 additions & 5 deletions lib/iris/tests/unit/cube/test_Cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,24 +449,49 @@ def test_different_array_attrs_incompatible(self):

class Test_aggregated_by(tests.IrisTest):
def setUp(self):
self.cube = Cube(np.arange(11))
self.cube = Cube(np.arange(44).reshape(4, 11))

val_coord = AuxCoord([0, 0, 0, 1, 1, 2, 0, 0, 2, 0, 1],
long_name="val")
label_coord = AuxCoord(['alpha', 'alpha', 'beta',
'beta', 'alpha', 'gamma',
'alpha', 'alpha', 'alpha',
'gamma', 'beta'],
long_name='label', units='no_unit')
self.cube.add_aux_coord(val_coord, 0)
self.cube.add_aux_coord(label_coord, 0)
simple_agg_coord = AuxCoord([1, 1, 2, 2], long_name='simple_agg')
spanning_coord = AuxCoord(np.arange(44).reshape(4, 11),
long_name='spanning')
spanning_label_coord = AuxCoord(
np.arange(1, 441, 10).reshape(4, 11).astype(str),
long_name='span_label', units='no_unit')

self.cube.add_aux_coord(simple_agg_coord, 0)
self.cube.add_aux_coord(val_coord, 1)
self.cube.add_aux_coord(label_coord, 1)
self.cube.add_aux_coord(spanning_coord, (0, 1))
self.cube.add_aux_coord(spanning_label_coord, (0, 1))

self.mock_agg = mock.Mock(spec=Aggregator)
self.mock_agg.cell_method = []
self.mock_agg.aggregate = mock.Mock(
return_value=mock.Mock(dtype='object'))
self.mock_agg.aggregate_shape = mock.Mock(return_value=())
self.mock_agg.post_process = mock.Mock(side_effect=lambda x, y, z: x)

def test_string_coord_agg_by_label(self):
def test_2d_coord_simple_agg(self):
# For 2d coords, slices of aggregated coord should be the same as
# aggregated slices.
res_cube = self.cube.aggregated_by('simple_agg', self.mock_agg)
for res_slice, cube_slice in zip(res_cube.slices('simple_agg'),
self.cube.slices('simple_agg')):
cube_slice_agg = cube_slice.aggregated_by('simple_agg',
self.mock_agg)
self.assertEqual(res_slice.coord('spanning'),
cube_slice_agg.coord('spanning'))
self.assertEqual(res_slice.coord('span_label'),
cube_slice_agg.coord('span_label'))

def test_agg_by_label(self):
# Aggregate a cube on a string coordinate label where label
# and val entries are not in step; the resulting cube has a val
# coord of bounded cells and a label coord of single string entries.
Expand All @@ -479,7 +504,17 @@ def test_string_coord_agg_by_label(self):
self.assertEqual(res_cube.coord('val'), val_coord)
self.assertEqual(res_cube.coord('label'), label_coord)

def test_string_coord_agg_by_val(self):
def test_2d_agg_by_label(self):
res_cube = self.cube.aggregated_by('label', self.mock_agg)
# For 2d coord, slices of aggregated coord should be the same as
# aggregated slices.
for res_slice, cube_slice in zip(res_cube.slices('val'),
self.cube.slices('val')):
cube_slice_agg = cube_slice.aggregated_by('label', self.mock_agg)
self.assertEqual(res_slice.coord('spanning'),
cube_slice_agg.coord('spanning'))

def test_agg_by_val(self):
# Aggregate a cube on a numeric coordinate val where label
# and val entries are not in step; the resulting cube has a label
# coord with serialised labels from the aggregated cells.
Expand All @@ -493,6 +528,16 @@ def test_string_coord_agg_by_val(self):
self.assertEqual(res_cube.coord('val'), val_coord)
self.assertEqual(res_cube.coord('label'), label_coord)

def test_2d_agg_by_val(self):
res_cube = self.cube.aggregated_by('val', self.mock_agg)
# For 2d coord, slices of aggregated coord should be the same as
# aggregated slices.
for res_slice, cube_slice in zip(res_cube.slices('val'),
self.cube.slices('val')):
cube_slice_agg = cube_slice.aggregated_by('val', self.mock_agg)
self.assertEqual(res_slice.coord('spanning'),
cube_slice_agg.coord('spanning'))

def test_single_string_aggregation(self):
aux_coords = [(AuxCoord(['a', 'b', 'a'], long_name='foo'), 0),
(AuxCoord(['a', 'a', 'a'], long_name='bar'), 0)]
Expand Down