Skip to content

concat prealigned objects #1413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 31 additions & 22 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@


def concat(objs, dim=None, data_vars='all', coords='different',
compat='equals', positions=None, indexers=None, mode=None,
concat_over=None):
compat='equals', positions=None, prealigned=False,
indexers=None,mode=None, concat_over=None):
"""Concatenate xarray objects along a new or existing dimension.

Parameters
Expand Down Expand Up @@ -66,6 +66,10 @@ def concat(objs, dim=None, data_vars='all', coords='different',
List of integer arrays which specifies the integer positions to which
to assign each dataset along the concatenated dimension. If not
supplied, objects are concatenated in the provided order.
prealigned : bool, optional
If True, the objects will be assumed to be already aligned. Coordinates
will be taken from the first object and ignored from the subsequent
objects.
indexers, mode, concat_over : deprecated

Returns
Expand Down Expand Up @@ -117,7 +121,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
else:
raise TypeError('can only concatenate xarray Dataset and DataArray '
'objects, got %s' % type(first_obj))
return f(objs, dim, data_vars, coords, compat, positions)
return f(objs, dim, data_vars, coords, compat, positions, prealigned)


def _calc_concat_dim_coord(dim):
Expand Down Expand Up @@ -195,7 +199,8 @@ def differs(vname):
return concat_over


def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
prealigned):
"""
Concatenate a sequence of datasets along a new or existing dimension
"""
Expand All @@ -207,7 +212,10 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):

dim, coord = _calc_concat_dim_coord(dim)
datasets = [as_dataset(ds) for ds in datasets]
datasets = align(*datasets, join='outer', copy=False, exclude=[dim])
if not prealigned:
datasets = align(*datasets, join='outer', copy=False, exclude=[dim])
else:
coords = 'minimal'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's bad form to unilaterally override an argument with another value -- it's better to raise an error (or maybe a warning).

The only value of coords that really breaks here is 'different', and even that value could conceivably make sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about just adding the option coords='prealigned'?

My initial thought was that, for prealigned data, all coords should just be drawn from the first object. But on second thought, what if there are other coords in the later dataset that do need to be concatenated, e.g. concat over time with an auxiliary coordinates iteration_number with dimension time.

It definitely doesn't work with coords='different'. I have not tried all the other options. I have a hard time conceptualizing what the different coords options do. Some guidance would be very welcome. I don't really understand what the function _calc_concat_over does.


concat_over = _calc_concat_over(datasets, dim, data_vars, coords)

Expand All @@ -228,21 +236,22 @@ def insert_result_variable(k, v):

# check that global attributes and non-concatenated variables are fixed
# across all datasets
for ds in datasets[1:]:
if (compat == 'identical' and
not utils.dict_equiv(ds.attrs, result_attrs)):
raise ValueError('dataset global attributes not equal')
for k, v in iteritems(ds.variables):
if k not in result_vars and k not in concat_over:
raise ValueError('encountered unexpected variable %r' % k)
elif (k in result_coord_names) != (k in ds.coords):
raise ValueError('%r is a coordinate in some datasets but not '
'others' % k)
elif (k in result_vars and k != dim and
not getattr(v, compat)(result_vars[k])):
verb = 'equal' if compat == 'equals' else compat
raise ValueError(
'variable %r not %s across datasets' % (k, verb))
if not prealigned:
for ds in datasets[1:]:
if (compat == 'identical' and
not utils.dict_equiv(ds.attrs, result_attrs)):
raise ValueError('dataset global attributes not equal')
for k, v in iteritems(ds.variables):
if k not in result_vars and k not in concat_over:
raise ValueError('encountered unexpected variable %r' % k)
elif (k in result_coord_names) != (k in ds.coords):
raise ValueError('%r is a coordinate in some datasets but not '
'others' % k)
elif (k in result_vars and k != dim and
not getattr(v, compat)(result_vars[k])):
verb = 'equal' if compat == 'equals' else compat
raise ValueError(
'variable %r not %s across datasets' % (k, verb))

# we've already verified everything is consistent; now, calculate
# shared dimension sizes so we can expand the necessary variables
Expand Down Expand Up @@ -284,7 +293,7 @@ def ensure_common_dims(vars):


def _dataarray_concat(arrays, dim, data_vars, coords, compat,
positions):
positions, prealigned):
arrays = list(arrays)

if data_vars != 'all':
Expand All @@ -303,7 +312,7 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
datasets.append(arr._to_temp_dataset())

ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
positions)
positions, prealigned)
return arrays[0]._from_temp_dataset(ds, name)


Expand Down
24 changes: 24 additions & 0 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,30 @@ def test_concat_autoalign(self):
coords={'x': [1, 2, 3]})})
self.assertDatasetIdentical(expected, actual)

def test_concat_prealigned(self):
# concat over new dimension
ds1 = Dataset({'foo': (['x'], [1, 2])},
coords={'x': (['x'], [1, 2]), 'z': (['x'], ['a', 'b'])})
ds2 = Dataset({'foo': (['x'], [1, 2])},
coords={'x': (['x'], [1, 3]), 'z': (['x'], ['f', 'g'])})
actual = concat([ds1, ds2], 'y', prealigned=True)
# the concatenated datset should just ignore all coords in ds2 and only
# concat data variables, regardless of whether they are the same
expected = Dataset({'foo': (['y', 'x'], [[1, 2], [1, 2]])},
coords=ds1.coords)
self.assertDatasetIdentical(expected, actual)

# concat over existing dimension
data = create_test_data()
for k in list(data):
if 'dim3' in data[k].dims:
del data[k]

split_data = [data.isel(dim1=slice(3)),
data.isel(dim1=slice(3, None))]
concat_data = concat(split_data, 'dim1', prealigned=True)
self.assertDatasetIdentical(data, concat_data)

def test_concat_errors(self):
data = create_test_data()
split_data = [data.isel(dim1=slice(3)),
Expand Down