Skip to content

Commit 4f5ca73

Browse files
dcherianJoe Hamman
authored and
Joe Hamman
committed
Make concat more forgiving with variables that are being merged. (#3364)
* Make concat more forgiving with variables that are being merged. * rename test. * simplify test. * make diff smaller.
1 parent ae1d8c7 commit 4f5ca73

File tree

4 files changed

+24
-18
lines changed

4 files changed

+24
-18
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ Bug fixes
8989
- Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord
9090
now plot the correct data for 2D DataArrays
9191
(:issue:`3334`). By `Tom Nicholas <http://github.com/TomNicholas>`_.
92+
- Make :py:func:`~xarray.concat` more robust when merging variables present in some datasets but
93+
not others (:issue:`508`). By `Deepak Cherian <http://github.com/dcherian>`_.
9294
- The default behaviour of reducing across all dimensions for
9395
:py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed
9496
as was done for :py:class:`~xarray.core.groupby.DatasetGroupBy` in 0.13.0 (:issue:`3337`).

xarray/core/concat.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -312,15 +312,9 @@ def _dataset_concat(
312312
to_merge = {var: [] for var in variables_to_merge}
313313

314314
for ds in datasets:
315-
absent_merge_vars = variables_to_merge - set(ds.variables)
316-
if absent_merge_vars:
317-
raise ValueError(
318-
"variables %r are present in some datasets but not others. "
319-
% absent_merge_vars
320-
)
321-
322315
for var in variables_to_merge:
323-
to_merge[var].append(ds.variables[var])
316+
if var in ds:
317+
to_merge[var].append(ds.variables[var])
324318

325319
for var in variables_to_merge:
326320
result_vars[var] = unique_variable(

xarray/tests/test_combine.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -782,12 +782,11 @@ def test_auto_combine_previously_failed(self):
782782
actual = auto_combine(datasets, concat_dim="t")
783783
assert_identical(expected, actual)
784784

785-
def test_auto_combine_still_fails(self):
786-
# concat can't handle new variables (yet):
787-
# https://github.com/pydata/xarray/issues/508
785+
def test_auto_combine_with_new_variables(self):
788786
datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})]
789-
with pytest.raises(ValueError):
790-
auto_combine(datasets, "y")
787+
actual = auto_combine(datasets, "y")
788+
expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1})
789+
assert_identical(expected, actual)
791790

792791
def test_auto_combine_no_concat(self):
793792
objs = [Dataset({"x": 0}), Dataset({"y": 1})]

xarray/tests/test_concat.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,22 @@ def test_concat_simple(self, data, dim, coords):
6868
datasets = [g for _, g in data.groupby(dim, squeeze=False)]
6969
assert_identical(data, concat(datasets, dim, coords=coords))
7070

71+
def test_concat_merge_variables_present_in_some_datasets(self, data):
72+
# coordinates present in some datasets but not others
73+
ds1 = Dataset(data_vars={"a": ("y", [0.1])}, coords={"x": 0.1})
74+
ds2 = Dataset(data_vars={"a": ("y", [0.2])}, coords={"z": 0.2})
75+
actual = concat([ds1, ds2], dim="y", coords="minimal")
76+
expected = Dataset({"a": ("y", [0.1, 0.2])}, coords={"x": 0.1, "z": 0.2})
77+
assert_identical(expected, actual)
78+
79+
# data variables present in some datasets but not others
80+
split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]
81+
data0, data1 = deepcopy(split_data)
82+
data1["foo"] = ("bar", np.random.randn(10))
83+
actual = concat([data0, data1], "dim1")
84+
expected = data.copy().assign(foo=data1.foo)
85+
assert_identical(expected, actual)
86+
7187
def test_concat_2(self, data):
7288
dim = "dim2"
7389
datasets = [g for _, g in data.groupby(dim, squeeze=True)]
@@ -190,11 +206,6 @@ def test_concat_errors(self):
190206
concat([data0, data1], "dim1", compat="identical")
191207
assert_identical(data, concat([data0, data1], "dim1", compat="equals"))
192208

193-
with raises_regex(ValueError, "present in some datasets"):
194-
data0, data1 = deepcopy(split_data)
195-
data1["foo"] = ("bar", np.random.randn(10))
196-
concat([data0, data1], "dim1")
197-
198209
with raises_regex(ValueError, "compat.* invalid"):
199210
concat(split_data, "dim1", compat="foobar")
200211

0 commit comments

Comments
 (0)