diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8443212f13c..7ed483b1a6d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,9 +26,14 @@ Enhancements Bug fixes ~~~~~~~~~ + - Fix issues for dates outside the valid range of pandas timestamps (:issue:`975`). By `Mathias Hauser `_. +- Unstacking produced flipped array after stacking decreasing coordinate values + (:issue:`980`). + By `Stephan Hoyer `_. + .. _whats-new.0.8.2: v0.8.2 (18 August 2016) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1176f1ce466..c93c3c5515e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1324,8 +1324,8 @@ def _stack_once(self, dims, new_dim): else: variables[name] = var.copy(deep=False) - idx = pd.MultiIndex.from_product([self.indexes[d] for d in dims], - names=dims) + idx = utils.multiindex_from_product_levels( + [self.indexes[d] for d in dims], names=dims) variables[new_dim] = Coordinate(new_dim, idx) coord_names = set(self._coord_names) - set(dims) | set([new_dim]) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c58f14d2f2a..d314b3053d4 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -59,6 +59,29 @@ def safe_cast_to_index(array): return index +def multiindex_from_product_levels(levels, names=None): + """Creating a MultiIndex from a product without refactorizing levels. + + Keeping levels the same is faster, and also gives back the original labels + when we unstack. + + Parameters + ---------- + levels : sequence of arrays + Unique labels for each level. + names : optional sequence of objects + Names for each level. + + Returns + ------- + pandas.MultiIndex + """ + labels_mesh = np.meshgrid(*[np.arange(len(lev)) for lev in levels], + indexing='ij') + labels = [x.ravel() for x in labels_mesh] + return pd.MultiIndex(levels, labels, sortorder=0, names=names) + + def maybe_wrap_array(original, new_array): """Wrap a transformed array with __array_wrap__ is it can be done safely. diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 892ef337e44..bcfa6816cc4 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -993,6 +993,15 @@ def test_stack_unstack(self): actual = orig.stack(z=['x', 'y']).unstack('z') self.assertDataArrayIdentical(orig, actual) + def test_stack_unstack_decreasing_coordinate(self): + # regression test for GH980 + orig = DataArray(np.random.rand(3, 4), dims=('y', 'x'), + coords={'x': np.arange(4), + 'y': np.arange(3, 0, -1)}) + stacked = orig.stack(allpoints=['y', 'x']) + actual = stacked.unstack('allpoints') + self.assertDataArrayIdentical(orig, actual) + def test_unstack_pandas_consistency(self): df = pd.DataFrame({'foo': range(3), 'x': ['a', 'b', 'b'], @@ -1628,7 +1637,7 @@ def test_align_dtype(self): def test_align_copy(self): x = DataArray([1, 2, 3], coords=[('a', [1, 2, 3])]) y = DataArray([1, 2], coords=[('a', [3, 1])]) - + expected_x2 = x expected_y2 = DataArray([2, np.nan, 1], coords=[('a', [1, 2, 3])]) @@ -1636,7 +1645,7 @@ def test_align_copy(self): self.assertDataArrayIdentical(expected_x2, x2) self.assertDataArrayIdentical(expected_y2, y2) assert source_ndarray(x2.data) is source_ndarray(x.data) - + x2, y2 = align(x, y, join='outer', copy=True) self.assertDataArrayIdentical(expected_x2, x2) self.assertDataArrayIdentical(expected_y2, y2) @@ -1647,23 +1656,28 @@ def test_align_copy(self): x2, = align(x, copy=False) self.assertDataArrayIdentical(x, x2) assert source_ndarray(x2.data) is source_ndarray(x.data) - + x2, = align(x, copy=True) self.assertDataArrayIdentical(x, x2) assert source_ndarray(x2.data) is not source_ndarray(x.data) def test_align_exclude(self): - x = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, -2]), ('b', [3, 4])]) - y = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, 20]), ('b', [5, 6])]) + x = DataArray([[1, 2], [3, 4]], + coords=[('a', [-1, -2]), ('b', [3, 4])]) + y = DataArray([[1, 2], [3, 4]], + coords=[('a', [-1, 20]), ('b', [5, 6])]) z = DataArray([1], dims=['a'], coords={'a': [20], 'b': 7}) - + x2, y2, z2 = align(x, y, z, join='outer', exclude=['b']) - expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) - expected_y2 = DataArray([[np.nan, np.nan], [1, 2], [3, 4]], coords=[('a', [-2, -1, 20]), ('b', [5, 6])]) - expected_z2 = DataArray([np.nan, np.nan, 1], dims=['a'], coords={'a': [-2, -1, 20], 'b': 7}) + expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], + coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) + expected_y2 = DataArray([[np.nan, np.nan], [1, 2], [3, 4]], + coords=[('a', [-2, -1, 20]), ('b', [5, 6])]) + expected_z2 = DataArray([np.nan, np.nan, 1], dims=['a'], + coords={'a': [-2, -1, 20], 'b': 7}) self.assertDataArrayIdentical(expected_x2, x2) self.assertDataArrayIdentical(expected_y2, y2) - self.assertDataArrayIdentical(expected_z2, z2) + self.assertDataArrayIdentical(expected_z2, z2) def test_align_indexes(self): x = DataArray([1, 2, 3], coords=[('a', [-1, 10, -2])]) @@ -1676,7 +1690,8 @@ def test_align_indexes(self): self.assertDataArrayIdentical(expected_y2, y2) x2, = align(x, join='outer', indexes={'a': [-2, 7, 10, -1]}) - expected_x2 = DataArray([3, np.nan, 2, 1], coords=[('a', [-2, 7, 10, -1])]) + expected_x2 = DataArray([3, np.nan, 2, 1], + coords=[('a', [-2, 7, 10, -1])]) self.assertDataArrayIdentical(expected_x2, x2) def test_broadcast_arrays(self): @@ -1699,10 +1714,13 @@ def test_broadcast_arrays(self): def test_broadcast_arrays_misaligned(self): # broadcast on misaligned coords must auto-align - x = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, -2]), ('b', [3, 4])]) + x = DataArray([[1, 2], [3, 4]], + coords=[('a', [-1, -2]), ('b', [3, 4])]) y = DataArray([1, 2], coords=[('a', [-1, 20])]) - expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) - expected_y2 = DataArray([[np.nan, np.nan], [1, 1], [2, 2]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) + expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], + coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) + expected_y2 = DataArray([[np.nan, np.nan], [1, 1], [2, 2]], + coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) x2, y2 = broadcast(x, y) self.assertDataArrayIdentical(expected_x2, x2) self.assertDataArrayIdentical(expected_y2, y2) @@ -1718,21 +1736,24 @@ def test_broadcast_arrays_nocopy(self): self.assertDataArrayIdentical(expected_x2, x2) self.assertDataArrayIdentical(expected_y2, y2) assert source_ndarray(x2.data) is source_ndarray(x.data) - + # single-element broadcast (trivial case) x2, = broadcast(x) self.assertDataArrayIdentical(x, x2) assert source_ndarray(x2.data) is source_ndarray(x.data) def test_broadcast_arrays_exclude(self): - x = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, -2]), ('b', [3, 4])]) + x = DataArray([[1, 2], [3, 4]], + coords=[('a', [-1, -2]), ('b', [3, 4])]) y = DataArray([1, 2], coords=[('a', [-1, 20])]) z = DataArray(5, coords={'b': 5}) - + x2, y2, z2 = broadcast(x, y, z, exclude=['b']) - expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) + expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], + coords=[('a', [-2, -1, 20]), ('b', [3, 4])]) expected_y2 = DataArray([np.nan, 1, 2], coords=[('a', [-2, -1, 20])]) - expected_z2 = DataArray([5, 5, 5], dims=['a'], coords={'a': [-2, -1, 20], 'b': 5}) + expected_z2 = DataArray([5, 5, 5], dims=['a'], + coords={'a': [-2, -1, 20], 'b': 5}) self.assertDataArrayIdentical(expected_x2, x2) self.assertDataArrayIdentical(expected_y2, y2) self.assertDataArrayIdentical(expected_z2, z2) diff --git a/xarray/test/test_utils.py b/xarray/test/test_utils.py index 1b0ddfce3e2..83ce59c5c44 100644 --- a/xarray/test/test_utils.py +++ b/xarray/test/test_utils.py @@ -22,6 +22,17 @@ def test(self): self.assertEqual(expected.dtype, actual.dtype) +def test_multiindex_from_product_levels(): + result = utils.multiindex_from_product_levels([['b', 'a'], [1, 3, 2]]) + np.testing.assert_array_equal( + result.labels, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + np.testing.assert_array_equal(result.levels[0], ['b', 'a']) + np.testing.assert_array_equal(result.levels[1], [1, 3, 2]) + + other = pd.MultiIndex.from_product([['b', 'a'], [1, 3, 2]]) + np.testing.assert_array_equal(result.values, other.values) + + class TestArrayEquiv(TestCase): def test_0d(self): # verify our work around for pd.isnull not working for 0-dimensional