Skip to content

Handle non-numpy dtypes without erroring #717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 13, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ Bug fixes

- ``DataArray.to_masked_array`` always returns masked array with mask being an array
(not a scalar value) (:issue:`684`)
- You can now pass pandas objects with non-numpy dtypes (e.g., ``categorical``
or ``datetime64`` with a timezone) into xray without an error
(:issue:`716`).

v0.6.2 (unreleased)
-------------------
Expand Down
6 changes: 5 additions & 1 deletion xray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,10 @@ def __init__(self, array, dtype=None):
# if a PeriodIndex, force an object dtype
if isinstance(array, pd.PeriodIndex):
dtype = np.dtype('O')
elif hasattr(array, 'categories'):
dtype = array.categories.dtype
elif not utils.is_valid_numpy_dtype(array.dtype):
dtype = np.dtype('O')
else:
dtype = array.dtype
self._dtype = dtype
Expand All @@ -387,7 +391,7 @@ def __array__(self, dtype=None):
with suppress(AttributeError):
# this might not be public API
array = array.asobject
return array.values.astype(dtype)
return np.asarray(array, dtype)

def __getitem__(self, key):
if isinstance(key, tuple) and len(key) == 1:
Expand Down
9 changes: 9 additions & 0 deletions xray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,15 @@ def is_scalar(value):
value is None)


def is_valid_numpy_dtype(dtype):
try:
np.dtype(dtype)
except (TypeError, ValueError):
return False
else:
return True


def dict_equiv(first, second, compat=equivalent):
"""Test equivalence of two dict-like objects. If any of the values are
numpy arrays, compare them correctly.
Expand Down
31 changes: 21 additions & 10 deletions xray/test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from distutils.version import LooseVersion
import numpy as np
import pytz
import pandas as pd

from xray import Variable, Dataset, DataArray
Expand Down Expand Up @@ -153,7 +154,7 @@ def test_0d_time_data(self):
def test_datetime64_conversion(self):
times = pd.date_range('2000-01-01', periods=3)
for values, preserve_source in [
(times, False),
(times, True),
(times.values, True),
(times.values.astype('datetime64[s]'), False),
(times.to_pydatetime(), False),
Expand All @@ -163,15 +164,12 @@ def test_datetime64_conversion(self):
self.assertArrayEqual(v.values, times.values)
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
same_source = source_ndarray(v.values) is source_ndarray(values)
if preserve_source and self.cls is Variable:
self.assertTrue(same_source)
else:
self.assertFalse(same_source)
assert preserve_source == same_source

def test_timedelta64_conversion(self):
times = pd.timedelta_range(start=0, periods=3)
for values, preserve_source in [
(times, False),
(times, True),
(times.values, True),
(times.values.astype('timedelta64[s]'), False),
(times.to_pytimedelta(), False),
Expand All @@ -181,10 +179,7 @@ def test_timedelta64_conversion(self):
self.assertArrayEqual(v.values, times.values)
self.assertEqual(v.values.dtype, np.dtype('timedelta64[ns]'))
same_source = source_ndarray(v.values) is source_ndarray(values)
if preserve_source and self.cls is Variable:
self.assertTrue(same_source)
else:
self.assertFalse(same_source)
assert preserve_source == same_source

def test_object_conversion(self):
data = np.arange(5).astype(str).astype(object)
Expand Down Expand Up @@ -405,6 +400,22 @@ def test_aggregate_complex(self):
expected = Variable((), 0.5 + 1j)
self.assertVariableAllClose(v.mean(), expected)

def test_pandas_cateogrical_dtype(self):
data = pd.Categorical(np.arange(10, dtype='int64'))
v = self.cls('x', data)
print(v) # should not error
assert v.dtype == 'int64'

def test_pandas_datetime64_with_tz(self):
data = pd.date_range(start='2000-01-01',
tz=pytz.timezone('America/New_York'),
periods=10, freq='1h')
v = self.cls('x', data)
print(v) # should not error
if 'America/New_York' in str(data.dtype):
# pandas is new enough that it has datetime64 with timezone dtype
assert v.dtype == 'object'


class TestVariable(TestCase, VariableSubclassTestCases):
cls = staticmethod(Variable)
Expand Down