Skip to content

Commit 1a01208

Browse files
benbovyJoe Hamman
authored and
Joe Hamman
committed
Fix unexpected behavior of .set_index() since pandas 0.21.0 (#1723)
* fix set_index behavior using pandas 0.21.0 * review comments
1 parent 8267fdb commit 1a01208

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

doc/whats-new.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,19 @@ What's New
1515
1616
.. _whats-new.0.10.0:
1717

18+
v0.10.0 (unreleased)
19+
--------------------
20+
21+
Bug fixes
22+
~~~~~~~~~
23+
24+
- Fixed unexpected behavior in ``Dataset.set_index()`` and
25+
``DataArray.set_index()`` introduced by Pandas 0.21.0. Setting a new
26+
index with a single variable resulted in 1-level
27+
``pandas.MultiIndex`` instead of a simple ``pandas.Index``
28+
(:issue:`1722`). By `Benoit Bovy <https://github.com/benbovy>`_.
29+
30+
1831
v0.10.0 rc2 (13 November 2017)
1932
------------------------------
2033

xarray/core/dataset.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ def merge_indexes(
136136
names, labels, levels = [], [], []
137137
current_index_variable = variables.get(dim)
138138

139+
for n in var_names:
140+
var = variables[n]
141+
if (current_index_variable is not None and
142+
var.dims != current_index_variable.dims):
143+
raise ValueError(
144+
"dimension mismatch between %r %s and %r %s"
145+
% (dim, current_index_variable.dims, n, var.dims))
146+
139147
if current_index_variable is not None and append:
140148
current_index = current_index_variable.to_index()
141149
if isinstance(current_index, pd.MultiIndex):
@@ -148,20 +156,19 @@ def merge_indexes(
148156
labels.append(cat.codes)
149157
levels.append(cat.categories)
150158

151-
for n in var_names:
152-
names.append(n)
153-
var = variables[n]
154-
if ((current_index_variable is not None) and
155-
(var.dims != current_index_variable.dims)):
156-
raise ValueError(
157-
"dimension mismatch between %r %s and %r %s"
158-
% (dim, current_index_variable.dims, n, var.dims))
159-
else:
159+
if not len(names) and len(var_names) == 1:
160+
idx = pd.Index(variables[var_names[0]].values)
161+
162+
else:
163+
for n in var_names:
164+
names.append(n)
165+
var = variables[n]
160166
cat = pd.Categorical(var.values, ordered=True)
161167
labels.append(cat.codes)
162168
levels.append(cat.categories)
163169

164-
idx = pd.MultiIndex(labels=labels, levels=levels, names=names)
170+
idx = pd.MultiIndex(labels=labels, levels=levels, names=names)
171+
165172
vars_to_replace[dim] = IndexVariable(dim, idx)
166173
vars_to_remove.extend(var_names)
167174

xarray/tests/test_dataset.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,6 +2007,12 @@ def test_set_index(self):
20072007
ds.set_index(x=mindex.names, inplace=True)
20082008
self.assertDatasetIdentical(ds, expected)
20092009

2010+
# ensure set_index with no existing index and a single data var given
2011+
# doesn't return multi-index
2012+
ds = Dataset(data_vars={'x_var': ('x', [0, 1, 2])})
2013+
expected = Dataset(coords={'x': [0, 1, 2]})
2014+
self.assertDataArrayIdentical(ds.set_index(x='x_var'), expected)
2015+
20102016
def test_reset_index(self):
20112017
ds = create_test_multiindex()
20122018
mindex = ds['x'].to_index()

0 commit comments

Comments
 (0)