Skip to content

Commit c9a0405

Browse files
TomAugspurgerjreback
authored andcommitted
Fixed construction / factorization of empty PA and IA (#24599)
1 parent 5ba4337 commit c9a0405

File tree

7 files changed

+37
-2
lines changed

7 files changed

+37
-2
lines changed

pandas/core/arrays/interval.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,11 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
217217

218218
@classmethod
219219
def _from_factorized(cls, values, original):
220+
if len(values) == 0:
221+
# An empty array returns object-dtype here. We can't create
222+
# a new IA from an (empty) object-dtype array, so turn it into the
223+
# correct dtype.
224+
values = values.astype(original.dtype.subtype)
220225
return cls(values, closed=original.closed)
221226

222227
_interval_shared_docs['from_breaks'] = """

pandas/core/arrays/period.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,13 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
189189
freq = dtype.freq
190190
else:
191191
freq = None
192+
193+
if isinstance(scalars, cls):
194+
validate_dtype_freq(scalars.dtype, freq)
195+
if copy:
196+
scalars = scalars.copy()
197+
return scalars
198+
192199
periods = np.asarray(scalars, dtype=object)
193200
if copy:
194201
periods = periods.copy()

pandas/tests/arrays/test_period.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,7 @@ def test_sub_period():
225225
def test_where_different_freq_raises(other):
226226
ser = pd.Series(period_array(['2000', '2001', '2002'], freq='D'))
227227
cond = np.array([True, False, True])
228-
with pytest.raises(IncompatibleFrequency,
229-
match="Input has different freq=H"):
228+
with pytest.raises(IncompatibleFrequency, match="freq"):
230229
ser.where(cond, other)
231230

232231

pandas/tests/extension/arrow/test_bool.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ class TestConstructors(BaseArrowTests, base.BaseConstructorsTests):
4444
def test_from_dtype(self, data):
4545
pytest.skip("GH-22666")
4646

47+
# seems like some bug in isna on empty BoolArray returning floats.
48+
@pytest.mark.xfail(reason='bad is-na for empty data')
49+
def test_from_sequence_from_cls(self, data):
50+
super(TestConstructors, self).test_from_sequence_from_cls(data)
51+
4752

4853
class TestReduce(base.BaseNoReduceTests):
4954
def test_reduce_series_boolean(self):

pandas/tests/extension/base/constructors.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99

1010
class BaseConstructorsTests(BaseExtensionTests):
1111

12+
def test_from_sequence_from_cls(self, data):
13+
result = type(data)._from_sequence(data, dtype=data.dtype)
14+
self.assert_extension_array_equal(result, data)
15+
16+
data = data[:0]
17+
result = type(data)._from_sequence(data, dtype=data.dtype)
18+
self.assert_extension_array_equal(result, data)
19+
1220
def test_array_from_scalars(self, data):
1321
scalars = [data[0], data[1], data[2]]
1422
result = data._from_sequence(scalars)

pandas/tests/extension/base/methods.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,14 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
105105
tm.assert_numpy_array_equal(l1, l2)
106106
self.assert_extension_array_equal(u1, u2)
107107

108+
def test_factorize_empty(self, data):
109+
labels, uniques = pd.factorize(data[:0])
110+
expected_labels = np.array([], dtype=np.intp)
111+
expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype)
112+
113+
tm.assert_numpy_array_equal(labels, expected_labels)
114+
self.assert_extension_array_equal(uniques, expected_uniques)
115+
108116
def test_fillna_copy_frame(self, data_missing):
109117
arr = data_missing.take([1, 1])
110118
df = pd.DataFrame({"A": arr})

pandas/tests/extension/json/array.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ def _concat_same_type(cls, to_concat):
179179

180180
def _values_for_factorize(self):
181181
frozen = self._values_for_argsort()
182+
if len(frozen) == 0:
183+
# _factorize_array expects 1-d array, this is a len-0 2-d array.
184+
frozen = frozen.ravel()
182185
return frozen, ()
183186

184187
def _values_for_argsort(self):

0 commit comments

Comments
 (0)