From 93a6db0b919ccff776d66179c6935d6a0cd0aa96 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 3 Jan 2019 13:10:50 -0600 Subject: [PATCH 1/2] Fixed construction of empty PA and IA Closes https://github.com/pandas-dev/pandas/issues/23933 --- pandas/core/arrays/interval.py | 5 +++++ pandas/core/arrays/period.py | 7 +++++++ pandas/tests/extension/arrow/test_bool.py | 5 +++++ pandas/tests/extension/base/constructors.py | 8 ++++++++ pandas/tests/extension/base/methods.py | 8 ++++++++ pandas/tests/extension/json/array.py | 3 +++ 6 files changed, 36 insertions(+) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 0e3c59120415d..2e7216108a23e 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -217,6 +217,11 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): @classmethod def _from_factorized(cls, values, original): + if len(values) == 0: + # An empty array returns object-dtype here. We can't create + # a new IA from an (empty) object-dtype array, so turn it into the + # correct dtype. + values = values.astype(original.dtype.subtype) return cls(values, closed=original.closed) _interval_shared_docs['from_breaks'] = """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 70da02f2ba0a1..6e3dc6f789cc9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -189,6 +189,13 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): freq = dtype.freq else: freq = None + + if isinstance(scalars, cls): + validate_dtype_freq(scalars.dtype, freq) + if copy: + scalars = scalars.copy() + return scalars + periods = np.asarray(scalars, dtype=object) if copy: periods = periods.copy() diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index f259e66e6cc76..2ace0fadc73e9 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -44,6 +44,11 @@ class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): def test_from_dtype(self, data): pytest.skip("GH-22666") + # seems like some bug in isna on empty BoolArray returning floats. + @pytest.mark.xfail(reason='bad is-na for empty data') + def test_from_sequence_from_cls(self, data): + super(TestConstructors, self).test_from_sequence_from_cls(data) + class TestReduce(base.BaseNoReduceTests): def test_reduce_series_boolean(self): diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 9c719b1304629..231a1f648f8e8 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -9,6 +9,14 @@ class BaseConstructorsTests(BaseExtensionTests): + def test_from_sequence_from_cls(self, data): + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + + data = data[:0] + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + def test_array_from_scalars(self, data): scalars = [data[0], data[1], data[2]] result = data._from_sequence(scalars) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 2c04c4cd99801..f64df7a84b7c0 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -105,6 +105,14 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel): tm.assert_numpy_array_equal(l1, l2) self.assert_extension_array_equal(u1, u2) + def test_factorize_empty(self, data): + labels, uniques = pd.factorize(data[:0]) + expected_labels = np.array([], dtype=np.intp) + expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype) + + tm.assert_numpy_array_equal(labels, expected_labels) + self.assert_extension_array_equal(uniques, expected_uniques) + def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) df = pd.DataFrame({"A": arr}) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index bd50584406312..10fd21f89c564 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -179,6 +179,9 @@ def _concat_same_type(cls, to_concat): def _values_for_factorize(self): frozen = self._values_for_argsort() + if len(frozen) == 0: + # _factorize_array expects 1-d array, this is a len-0 2-d array. + frozen = frozen.ravel() return frozen, () def _values_for_argsort(self): From 572f1d0d90b694cfa4923581ced6c72368bd3fd0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 3 Jan 2019 14:30:49 -0600 Subject: [PATCH 2/2] update where message --- pandas/tests/arrays/test_period.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 387eaa5223bbe..affe3b3854490 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -225,8 +225,7 @@ def test_sub_period(): def test_where_different_freq_raises(other): ser = pd.Series(period_array(['2000', '2001', '2002'], freq='D')) cond = np.array([True, False, True]) - with pytest.raises(IncompatibleFrequency, - match="Input has different freq=H"): + with pytest.raises(IncompatibleFrequency, match="freq"): ser.where(cond, other)