Skip to content

Fix flipped array after stacking decreasing coordinate values #985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@ Enhancements

Bug fixes
~~~~~~~~~

- Fix issues for dates outside the valid range of pandas timestamps
(:issue:`975`). By `Mathias Hauser <https://github.com/mathause>`_.

- Unstacking produced flipped array after stacking decreasing coordinate values
(:issue:`980`).
By `Stephan Hoyer <https://github.com/shoyer>`_.

.. _whats-new.0.8.2:

v0.8.2 (18 August 2016)
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,8 +1324,8 @@ def _stack_once(self, dims, new_dim):
else:
variables[name] = var.copy(deep=False)

idx = pd.MultiIndex.from_product([self.indexes[d] for d in dims],
names=dims)
idx = utils.multiindex_from_product_levels(
[self.indexes[d] for d in dims], names=dims)
variables[new_dim] = Coordinate(new_dim, idx)

coord_names = set(self._coord_names) - set(dims) | set([new_dim])
Expand Down
23 changes: 23 additions & 0 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,29 @@ def safe_cast_to_index(array):
return index


def multiindex_from_product_levels(levels, names=None):
"""Creating a MultiIndex from a product without refactorizing levels.

Keeping levels the same is faster, and also gives back the original labels
when we unstack.

Parameters
----------
levels : sequence of arrays
Unique labels for each level.
names : optional sequence of objects
Names for each level.

Returns
-------
pandas.MultiIndex
"""
labels_mesh = np.meshgrid(*[np.arange(len(lev)) for lev in levels],
indexing='ij')
labels = [x.ravel() for x in labels_mesh]
return pd.MultiIndex(levels, labels, sortorder=0, names=names)


def maybe_wrap_array(original, new_array):
"""Wrap a transformed array with __array_wrap__ is it can be done safely.

Expand Down
59 changes: 40 additions & 19 deletions xarray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,15 @@ def test_stack_unstack(self):
actual = orig.stack(z=['x', 'y']).unstack('z')
self.assertDataArrayIdentical(orig, actual)

def test_stack_unstack_decreasing_coordinate(self):
# regression test for GH980
orig = DataArray(np.random.rand(3, 4), dims=('y', 'x'),
coords={'x': np.arange(4),
'y': np.arange(3, 0, -1)})
stacked = orig.stack(allpoints=['y', 'x'])
actual = stacked.unstack('allpoints')
self.assertDataArrayIdentical(orig, actual)

def test_unstack_pandas_consistency(self):
df = pd.DataFrame({'foo': range(3),
'x': ['a', 'b', 'b'],
Expand Down Expand Up @@ -1628,15 +1637,15 @@ def test_align_dtype(self):
def test_align_copy(self):
x = DataArray([1, 2, 3], coords=[('a', [1, 2, 3])])
y = DataArray([1, 2], coords=[('a', [3, 1])])

expected_x2 = x
expected_y2 = DataArray([2, np.nan, 1], coords=[('a', [1, 2, 3])])

x2, y2 = align(x, y, join='outer', copy=False)
self.assertDataArrayIdentical(expected_x2, x2)
self.assertDataArrayIdentical(expected_y2, y2)
assert source_ndarray(x2.data) is source_ndarray(x.data)

x2, y2 = align(x, y, join='outer', copy=True)
self.assertDataArrayIdentical(expected_x2, x2)
self.assertDataArrayIdentical(expected_y2, y2)
Expand All @@ -1647,23 +1656,28 @@ def test_align_copy(self):
x2, = align(x, copy=False)
self.assertDataArrayIdentical(x, x2)
assert source_ndarray(x2.data) is source_ndarray(x.data)

x2, = align(x, copy=True)
self.assertDataArrayIdentical(x, x2)
assert source_ndarray(x2.data) is not source_ndarray(x.data)

def test_align_exclude(self):
x = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, -2]), ('b', [3, 4])])
y = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, 20]), ('b', [5, 6])])
x = DataArray([[1, 2], [3, 4]],
coords=[('a', [-1, -2]), ('b', [3, 4])])
y = DataArray([[1, 2], [3, 4]],
coords=[('a', [-1, 20]), ('b', [5, 6])])
z = DataArray([1], dims=['a'], coords={'a': [20], 'b': 7})

x2, y2, z2 = align(x, y, z, join='outer', exclude=['b'])
expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
expected_y2 = DataArray([[np.nan, np.nan], [1, 2], [3, 4]], coords=[('a', [-2, -1, 20]), ('b', [5, 6])])
expected_z2 = DataArray([np.nan, np.nan, 1], dims=['a'], coords={'a': [-2, -1, 20], 'b': 7})
expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]],
coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
expected_y2 = DataArray([[np.nan, np.nan], [1, 2], [3, 4]],
coords=[('a', [-2, -1, 20]), ('b', [5, 6])])
expected_z2 = DataArray([np.nan, np.nan, 1], dims=['a'],
coords={'a': [-2, -1, 20], 'b': 7})
self.assertDataArrayIdentical(expected_x2, x2)
self.assertDataArrayIdentical(expected_y2, y2)
self.assertDataArrayIdentical(expected_z2, z2)
self.assertDataArrayIdentical(expected_z2, z2)

def test_align_indexes(self):
x = DataArray([1, 2, 3], coords=[('a', [-1, 10, -2])])
Expand All @@ -1676,7 +1690,8 @@ def test_align_indexes(self):
self.assertDataArrayIdentical(expected_y2, y2)

x2, = align(x, join='outer', indexes={'a': [-2, 7, 10, -1]})
expected_x2 = DataArray([3, np.nan, 2, 1], coords=[('a', [-2, 7, 10, -1])])
expected_x2 = DataArray([3, np.nan, 2, 1],
coords=[('a', [-2, 7, 10, -1])])
self.assertDataArrayIdentical(expected_x2, x2)

def test_broadcast_arrays(self):
Expand All @@ -1699,10 +1714,13 @@ def test_broadcast_arrays(self):

def test_broadcast_arrays_misaligned(self):
# broadcast on misaligned coords must auto-align
x = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, -2]), ('b', [3, 4])])
x = DataArray([[1, 2], [3, 4]],
coords=[('a', [-1, -2]), ('b', [3, 4])])
y = DataArray([1, 2], coords=[('a', [-1, 20])])
expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
expected_y2 = DataArray([[np.nan, np.nan], [1, 1], [2, 2]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]],
coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
expected_y2 = DataArray([[np.nan, np.nan], [1, 1], [2, 2]],
coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
x2, y2 = broadcast(x, y)
self.assertDataArrayIdentical(expected_x2, x2)
self.assertDataArrayIdentical(expected_y2, y2)
Expand All @@ -1718,21 +1736,24 @@ def test_broadcast_arrays_nocopy(self):
self.assertDataArrayIdentical(expected_x2, x2)
self.assertDataArrayIdentical(expected_y2, y2)
assert source_ndarray(x2.data) is source_ndarray(x.data)

# single-element broadcast (trivial case)
x2, = broadcast(x)
self.assertDataArrayIdentical(x, x2)
assert source_ndarray(x2.data) is source_ndarray(x.data)

def test_broadcast_arrays_exclude(self):
x = DataArray([[1, 2], [3, 4]], coords=[('a', [-1, -2]), ('b', [3, 4])])
x = DataArray([[1, 2], [3, 4]],
coords=[('a', [-1, -2]), ('b', [3, 4])])
y = DataArray([1, 2], coords=[('a', [-1, 20])])
z = DataArray(5, coords={'b': 5})

x2, y2, z2 = broadcast(x, y, z, exclude=['b'])
expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]], coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
expected_x2 = DataArray([[3, 4], [1, 2], [np.nan, np.nan]],
coords=[('a', [-2, -1, 20]), ('b', [3, 4])])
expected_y2 = DataArray([np.nan, 1, 2], coords=[('a', [-2, -1, 20])])
expected_z2 = DataArray([5, 5, 5], dims=['a'], coords={'a': [-2, -1, 20], 'b': 5})
expected_z2 = DataArray([5, 5, 5], dims=['a'],
coords={'a': [-2, -1, 20], 'b': 5})
self.assertDataArrayIdentical(expected_x2, x2)
self.assertDataArrayIdentical(expected_y2, y2)
self.assertDataArrayIdentical(expected_z2, z2)
Expand Down
11 changes: 11 additions & 0 deletions xarray/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ def test(self):
self.assertEqual(expected.dtype, actual.dtype)


def test_multiindex_from_product_levels():
result = utils.multiindex_from_product_levels([['b', 'a'], [1, 3, 2]])
np.testing.assert_array_equal(
result.labels, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
np.testing.assert_array_equal(result.levels[0], ['b', 'a'])
np.testing.assert_array_equal(result.levels[1], [1, 3, 2])

other = pd.MultiIndex.from_product([['b', 'a'], [1, 3, 2]])
np.testing.assert_array_equal(result.values, other.values)


class TestArrayEquiv(TestCase):
def test_0d(self):
# verify our work around for pd.isnull not working for 0-dimensional
Expand Down