Skip to content

Commit f70638f

Browse files
authored
BUG: reindex with fill_value that should give EA dtype (#52586)
* BUG: reindex with fill_value that should give EA dtype * GH ref * typo fixup * xfail on ArrayManager
1 parent fb92f38 commit f70638f

File tree

4 files changed

+75
-53
lines changed

4 files changed

+75
-53
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,8 @@ Other
391391
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
392392
- Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
393393
- Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`)
394+
- Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
395+
-
394396

395397
.. ***DO NOT USE THIS SECTION***
396398

pandas/core/internals/concat.py

Lines changed: 7 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from typing import (
55
TYPE_CHECKING,
66
Sequence,
7-
cast,
87
)
98

109
import numpy as np
@@ -29,20 +28,14 @@
2928
needs_i8_conversion,
3029
)
3130
from pandas.core.dtypes.concat import concat_compat
32-
from pandas.core.dtypes.dtypes import (
33-
DatetimeTZDtype,
34-
ExtensionDtype,
35-
)
31+
from pandas.core.dtypes.dtypes import ExtensionDtype
3632
from pandas.core.dtypes.missing import (
3733
is_valid_na_for_dtype,
3834
isna,
3935
isna_all,
4036
)
4137

42-
from pandas.core.arrays import (
43-
DatetimeArray,
44-
ExtensionArray,
45-
)
38+
from pandas.core.arrays import ExtensionArray
4639
from pandas.core.arrays.sparse import SparseDtype
4740
from pandas.core.construction import ensure_wrapped_if_datetimelike
4841
from pandas.core.internals.array_manager import (
@@ -53,7 +46,10 @@
5346
ensure_block_shape,
5447
new_block_2d,
5548
)
56-
from pandas.core.internals.managers import BlockManager
49+
from pandas.core.internals.managers import (
50+
BlockManager,
51+
make_na_array,
52+
)
5753

5854
if TYPE_CHECKING:
5955
from pandas._typing import (
@@ -474,38 +470,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
474470
if len(values) and values[0] is None:
475471
fill_value = None
476472

477-
if isinstance(empty_dtype, DatetimeTZDtype):
478-
# NB: exclude e.g. pyarrow[dt64tz] dtypes
479-
i8values = np.full(self.block.shape, fill_value._value)
480-
return DatetimeArray(i8values, dtype=empty_dtype)
481-
482-
elif is_1d_only_ea_dtype(empty_dtype):
483-
empty_dtype = cast(ExtensionDtype, empty_dtype)
484-
cls = empty_dtype.construct_array_type()
485-
486-
missing_arr = cls._from_sequence([], dtype=empty_dtype)
487-
ncols, nrows = self.block.shape
488-
assert ncols == 1, ncols
489-
empty_arr = -1 * np.ones((nrows,), dtype=np.intp)
490-
return missing_arr.take(
491-
empty_arr, allow_fill=True, fill_value=fill_value
492-
)
493-
elif isinstance(empty_dtype, ExtensionDtype):
494-
# TODO: no tests get here, a handful would if we disabled
495-
# the dt64tz special-case above (which is faster)
496-
cls = empty_dtype.construct_array_type()
497-
missing_arr = cls._empty(shape=self.block.shape, dtype=empty_dtype)
498-
missing_arr[:] = fill_value
499-
return missing_arr
500-
else:
501-
# NB: we should never get here with empty_dtype integer or bool;
502-
# if we did, the missing_arr.fill would cast to gibberish
503-
missing_arr = np.empty(self.block.shape, dtype=empty_dtype)
504-
missing_arr.fill(fill_value)
505-
506-
if empty_dtype.kind in "mM":
507-
missing_arr = ensure_wrapped_if_datetimelike(missing_arr)
508-
return missing_arr
473+
return make_na_array(empty_dtype, self.block.shape, fill_value)
509474

510475
if not self.block._can_consolidate:
511476
# preserve these for validation in concat_compat

pandas/core/internals/managers.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@
3838
is_dtype_equal,
3939
is_list_like,
4040
)
41-
from pandas.core.dtypes.dtypes import ExtensionDtype
41+
from pandas.core.dtypes.dtypes import (
42+
DatetimeTZDtype,
43+
ExtensionDtype,
44+
)
4245
from pandas.core.dtypes.generic import (
4346
ABCDataFrame,
4447
ABCSeries,
@@ -49,6 +52,7 @@
4952
)
5053

5154
import pandas.core.algorithms as algos
55+
from pandas.core.arrays import DatetimeArray
5256
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
5357
from pandas.core.arrays.sparse import SparseDtype
5458
import pandas.core.common as com
@@ -915,16 +919,11 @@ def _make_na_block(
915919

916920
if fill_value is None:
917921
fill_value = np.nan
918-
block_shape = list(self.shape)
919-
block_shape[0] = len(placement)
920-
921-
dtype, fill_value = infer_dtype_from_scalar(fill_value)
922-
# error: Argument "dtype" to "empty" has incompatible type "Union[dtype,
923-
# ExtensionDtype]"; expected "Union[dtype, None, type, _SupportsDtype, str,
924-
# Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict,
925-
# Tuple[Any, Any]]"
926-
block_values = np.empty(block_shape, dtype=dtype) # type: ignore[arg-type]
927-
block_values.fill(fill_value)
922+
923+
shape = (len(placement), self.shape[1])
924+
925+
dtype, fill_value = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
926+
block_values = make_na_array(dtype, shape, fill_value)
928927
return new_block_2d(block_values, placement=placement)
929928

930929
def take(
@@ -2359,3 +2358,36 @@ def _preprocess_slice_or_indexer(
23592358
if not allow_fill:
23602359
indexer = maybe_convert_indices(indexer, length)
23612360
return "fancy", indexer, len(indexer)
2361+
2362+
2363+
def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike:
2364+
if isinstance(dtype, DatetimeTZDtype):
2365+
# NB: exclude e.g. pyarrow[dt64tz] dtypes
2366+
i8values = np.full(shape, fill_value._value)
2367+
return DatetimeArray(i8values, dtype=dtype)
2368+
2369+
elif is_1d_only_ea_dtype(dtype):
2370+
dtype = cast(ExtensionDtype, dtype)
2371+
cls = dtype.construct_array_type()
2372+
2373+
missing_arr = cls._from_sequence([], dtype=dtype)
2374+
ncols, nrows = shape
2375+
assert ncols == 1, ncols
2376+
empty_arr = -1 * np.ones((nrows,), dtype=np.intp)
2377+
return missing_arr.take(empty_arr, allow_fill=True, fill_value=fill_value)
2378+
elif isinstance(dtype, ExtensionDtype):
2379+
# TODO: no tests get here, a handful would if we disabled
2380+
# the dt64tz special-case above (which is faster)
2381+
cls = dtype.construct_array_type()
2382+
missing_arr = cls._empty(shape=shape, dtype=dtype)
2383+
missing_arr[:] = fill_value
2384+
return missing_arr
2385+
else:
2386+
# NB: we should never get here with dtype integer or bool;
2387+
# if we did, the missing_arr.fill would cast to gibberish
2388+
missing_arr = np.empty(shape, dtype=dtype)
2389+
missing_arr.fill(fill_value)
2390+
2391+
if dtype.kind in "mM":
2392+
missing_arr = ensure_wrapped_if_datetimelike(missing_arr)
2393+
return missing_arr

pandas/tests/frame/methods/test_reindex.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,29 @@ class TestDataFrameSelectReindex:
119119
# These are specific reindex-based tests; other indexing tests should go in
120120
# test_indexing
121121

122+
@td.skip_array_manager_not_yet_implemented
123+
def test_reindex_tzaware_fill_value(self):
124+
# GH#52586
125+
df = DataFrame([[1]])
126+
127+
ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific")
128+
res = df.reindex([0, 1], axis=1, fill_value=ts)
129+
assert res.dtypes[1] == pd.DatetimeTZDtype(tz="US/Pacific")
130+
expected = DataFrame({0: [1], 1: [ts]})
131+
tm.assert_frame_equal(res, expected)
132+
133+
per = ts.tz_localize(None).to_period("s")
134+
res = df.reindex([0, 1], axis=1, fill_value=per)
135+
assert res.dtypes[1] == pd.PeriodDtype("s")
136+
expected = DataFrame({0: [1], 1: [per]})
137+
tm.assert_frame_equal(res, expected)
138+
139+
interval = pd.Interval(ts, ts + pd.Timedelta(seconds=1))
140+
res = df.reindex([0, 1], axis=1, fill_value=interval)
141+
assert res.dtypes[1] == pd.IntervalDtype("datetime64[ns, US/Pacific]", "right")
142+
expected = DataFrame({0: [1], 1: [interval]})
143+
tm.assert_frame_equal(res, expected)
144+
122145
def test_reindex_copies(self):
123146
# based on asv time_reindex_axis1
124147
N = 10

0 commit comments

Comments
 (0)