pandas-dev · mroeschke · Apr 11, 2023 · Apr 11, 2023 · Apr 11, 2023 · Apr 11, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -390,6 +390,8 @@ Other
 - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
 - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
 - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`)
+- Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
+-
 
 .. ***DO NOT USE THIS SECTION***
 

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -4,7 +4,6 @@
 from typing import (
     TYPE_CHECKING,
     Sequence,
-    cast,
 )
 
 import numpy as np
@@ -29,20 +28,14 @@
     needs_i8_conversion,
 )
 from pandas.core.dtypes.concat import concat_compat
-from pandas.core.dtypes.dtypes import (
-    DatetimeTZDtype,
-    ExtensionDtype,
-)
+from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.missing import (
     is_valid_na_for_dtype,
     isna,
     isna_all,
 )
 
-from pandas.core.arrays import (
-    DatetimeArray,
-    ExtensionArray,
-)
+from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseDtype
 from pandas.core.construction import ensure_wrapped_if_datetimelike
 from pandas.core.internals.array_manager import (
@@ -53,7 +46,10 @@
     ensure_block_shape,
     new_block_2d,
 )
-from pandas.core.internals.managers import BlockManager
+from pandas.core.internals.managers import (
+    BlockManager,
+    make_na_array,
+)
 
 if TYPE_CHECKING:
     from pandas._typing import (
@@ -474,38 +470,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
                     if len(values) and values[0] is None:
                         fill_value = None
 
-                if isinstance(empty_dtype, DatetimeTZDtype):
-                    # NB: exclude e.g. pyarrow[dt64tz] dtypes
-                    i8values = np.full(self.block.shape, fill_value._value)
-                    return DatetimeArray(i8values, dtype=empty_dtype)
-
-                elif is_1d_only_ea_dtype(empty_dtype):
-                    empty_dtype = cast(ExtensionDtype, empty_dtype)
-                    cls = empty_dtype.construct_array_type()
-
-                    missing_arr = cls._from_sequence([], dtype=empty_dtype)
-                    ncols, nrows = self.block.shape
-                    assert ncols == 1, ncols
-                    empty_arr = -1 * np.ones((nrows,), dtype=np.intp)
-                    return missing_arr.take(
-                        empty_arr, allow_fill=True, fill_value=fill_value
-                    )
-                elif isinstance(empty_dtype, ExtensionDtype):
-                    # TODO: no tests get here, a handful would if we disabled
-                    #  the dt64tz special-case above (which is faster)
-                    cls = empty_dtype.construct_array_type()
-                    missing_arr = cls._empty(shape=self.block.shape, dtype=empty_dtype)
-                    missing_arr[:] = fill_value
-                    return missing_arr
-                else:
-                    # NB: we should never get here with empty_dtype integer or bool;
-                    #  if we did, the missing_arr.fill would cast to gibberish
-                    missing_arr = np.empty(self.block.shape, dtype=empty_dtype)
-                    missing_arr.fill(fill_value)
-
-                    if empty_dtype.kind in "mM":
-                        missing_arr = ensure_wrapped_if_datetimelike(missing_arr)
-                    return missing_arr
+                return make_na_array(empty_dtype, self.block.shape, fill_value)
 
             if not self.block._can_consolidate:
                 # preserve these for validation in concat_compat

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -38,7 +38,10 @@
     is_dtype_equal,
     is_list_like,
 )
-from pandas.core.dtypes.dtypes import ExtensionDtype
+from pandas.core.dtypes.dtypes import (
+    DatetimeTZDtype,
+    ExtensionDtype,
+)
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCSeries,
@@ -49,6 +52,7 @@
 )
 
 import pandas.core.algorithms as algos
+from pandas.core.arrays import DatetimeArray
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 from pandas.core.arrays.sparse import SparseDtype
 import pandas.core.common as com
@@ -915,16 +919,11 @@ def _make_na_block(
 
         if fill_value is None:
             fill_value = np.nan
-        block_shape = list(self.shape)
-        block_shape[0] = len(placement)
-
-        dtype, fill_value = infer_dtype_from_scalar(fill_value)
-        # error: Argument "dtype" to "empty" has incompatible type "Union[dtype,
-        # ExtensionDtype]"; expected "Union[dtype, None, type, _SupportsDtype, str,
-        # Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict,
-        # Tuple[Any, Any]]"
-        block_values = np.empty(block_shape, dtype=dtype)  # type: ignore[arg-type]
-        block_values.fill(fill_value)
+
+        shape = (len(placement), self.shape[1])
+
+        dtype, fill_value = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
+        block_values = make_na_array(dtype, shape, fill_value)
         return new_block_2d(block_values, placement=placement)
 
     def take(
@@ -2359,3 +2358,36 @@ def _preprocess_slice_or_indexer(
         if not allow_fill:
             indexer = maybe_convert_indices(indexer, length)
         return "fancy", indexer, len(indexer)
+
+
+def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike:
+    if isinstance(dtype, DatetimeTZDtype):
+        # NB: exclude e.g. pyarrow[dt64tz] dtypes
+        i8values = np.full(shape, fill_value._value)
+        return DatetimeArray(i8values, dtype=dtype)
+
+    elif is_1d_only_ea_dtype(dtype):
+        dtype = cast(ExtensionDtype, dtype)
+        cls = dtype.construct_array_type()
+
+        missing_arr = cls._from_sequence([], dtype=dtype)
+        ncols, nrows = shape
+        assert ncols == 1, ncols
+        empty_arr = -1 * np.ones((nrows,), dtype=np.intp)
+        return missing_arr.take(empty_arr, allow_fill=True, fill_value=fill_value)
+    elif isinstance(dtype, ExtensionDtype):
+        # TODO: no tests get here, a handful would if we disabled
+        #  the dt64tz special-case above (which is faster)
+        cls = dtype.construct_array_type()
+        missing_arr = cls._empty(shape=shape, dtype=dtype)
+        missing_arr[:] = fill_value
+        return missing_arr
+    else:
+        # NB: we should never get here with dtype integer or bool;
+        #  if we did, the missing_arr.fill would cast to gibberish
+        missing_arr = np.empty(shape, dtype=dtype)
+        missing_arr.fill(fill_value)
+
+        if dtype.kind in "mM":
+            missing_arr = ensure_wrapped_if_datetimelike(missing_arr)
+        return missing_arr
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
@@ -119,6 +119,29 @@ class TestDataFrameSelectReindex:
     # These are specific reindex-based tests; other indexing tests should go in
     # test_indexing
 
+    @td.skip_array_manager_not_yet_implemented
+    def test_reindex_tzaware_fill_value(self):
+        # GH#52586
+        df = DataFrame([[1]])
+
+        ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific")
+        res = df.reindex([0, 1], axis=1, fill_value=ts)
+        assert res.dtypes[1] == pd.DatetimeTZDtype(tz="US/Pacific")
+        expected = DataFrame({0: [1], 1: [ts]})
+        tm.assert_frame_equal(res, expected)
+
+        per = ts.tz_localize(None).to_period("s")
+        res = df.reindex([0, 1], axis=1, fill_value=per)
+        assert res.dtypes[1] == pd.PeriodDtype("s")
+        expected = DataFrame({0: [1], 1: [per]})
+        tm.assert_frame_equal(res, expected)
+
+        interval = pd.Interval(ts, ts + pd.Timedelta(seconds=1))
+        res = df.reindex([0, 1], axis=1, fill_value=interval)
+        assert res.dtypes[1] == pd.IntervalDtype("datetime64[ns, US/Pacific]", "right")
+        expected = DataFrame({0: [1], 1: [interval]})
+        tm.assert_frame_equal(res, expected)
+
     def test_reindex_copies(self):
         # based on asv time_reindex_axis1
         N = 10