Skip to content

Commit 8864319

Browse files
authored
DEPR: Deprecate dtype inference on pandas objects (#56244)
1 parent 2a9c3d7 commit 8864319

File tree

16 files changed

+141
-26
lines changed

16 files changed

+141
-26
lines changed

doc/source/whatsnew/v2.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ Other Deprecations
555555
- Deprecated behavior of :meth:`Index.insert` with an object-dtype index silently performing type inference on the result, explicitly call ``result.infer_objects(copy=False)`` for the old behavior instead (:issue:`51363`)
556556
- Deprecated casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
557557
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
558+
- Deprecated dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when giving a pandas input, call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`)
558559
- Deprecated dtype inference when setting a :class:`Index` into a :class:`DataFrame`, cast explicitly instead (:issue:`56102`)
559560
- Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
560561
- Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`)

pandas/_testing/__init__.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
ContextManager,
1111
cast,
1212
)
13+
import warnings
1314

1415
import numpy as np
1516

@@ -285,11 +286,17 @@ def box_expected(expected, box_cls, transpose: bool = True):
285286
else:
286287
expected = pd.array(expected, copy=False)
287288
elif box_cls is Index:
288-
expected = Index(expected)
289+
with warnings.catch_warnings():
290+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
291+
expected = Index(expected)
289292
elif box_cls is Series:
290-
expected = Series(expected)
293+
with warnings.catch_warnings():
294+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
295+
expected = Series(expected)
291296
elif box_cls is DataFrame:
292-
expected = Series(expected).to_frame()
297+
with warnings.catch_warnings():
298+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
299+
expected = Series(expected).to_frame()
293300
if transpose:
294301
# for vector operations, we need a DataFrame to be a single-row,
295302
# not a single-column, in order to operate against non-DataFrame

pandas/core/frame.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,10 @@ def __init__(
722722

723723
manager = _get_option("mode.data_manager", silent=True)
724724

725+
is_pandas_object = isinstance(data, (Series, Index, ExtensionArray))
726+
data_dtype = getattr(data, "dtype", None)
727+
original_dtype = dtype
728+
725729
# GH47215
726730
if isinstance(index, set):
727731
raise ValueError("index cannot be a set")
@@ -908,6 +912,18 @@ def __init__(
908912

909913
NDFrame.__init__(self, mgr)
910914

915+
if original_dtype is None and is_pandas_object and data_dtype == np.object_:
916+
if self.dtypes.iloc[0] != data_dtype:
917+
warnings.warn(
918+
"Dtype inference on a pandas object "
919+
"(Series, Index, ExtensionArray) is deprecated. The DataFrame "
920+
"constructor will keep the original dtype in the future. "
921+
"Call `infer_objects` on the result to get the old "
922+
"behavior.",
923+
FutureWarning,
924+
stacklevel=2,
925+
)
926+
911927
# ----------------------------------------------------------------------
912928

913929
def __dataframe__(

pandas/core/indexes/base.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,8 @@ def __new__(
493493
if not copy and isinstance(data, (ABCSeries, Index)):
494494
refs = data._references
495495

496+
is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray))
497+
496498
# range
497499
if isinstance(data, (range, RangeIndex)):
498500
result = RangeIndex(start=data, copy=copy, name=name)
@@ -572,7 +574,19 @@ def __new__(
572574
klass = cls._dtype_to_subclass(arr.dtype)
573575

574576
arr = klass._ensure_array(arr, arr.dtype, copy=False)
575-
return klass._simple_new(arr, name, refs=refs)
577+
result = klass._simple_new(arr, name, refs=refs)
578+
if dtype is None and is_pandas_object and data_dtype == np.object_:
579+
if result.dtype != data_dtype:
580+
warnings.warn(
581+
"Dtype inference on a pandas object "
582+
"(Series, Index, ExtensionArray) is deprecated. The Index "
583+
"constructor will keep the original dtype in the future. "
584+
"Call `infer_objects` on the result to get the old "
585+
"behavior.",
586+
FutureWarning,
587+
stacklevel=2,
588+
)
589+
return result # type: ignore[return-value]
576590

577591
@classmethod
578592
def _ensure_array(cls, data, dtype, copy: bool):

pandas/core/series.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,10 @@ def __init__(
424424
self.name = name
425425
return
426426

427+
is_pandas_object = isinstance(data, (Series, Index, ExtensionArray))
428+
data_dtype = getattr(data, "dtype", None)
429+
original_dtype = dtype
430+
427431
if isinstance(data, (ExtensionArray, np.ndarray)):
428432
if copy is not False and using_copy_on_write():
429433
if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
@@ -581,6 +585,17 @@ def __init__(
581585
self.name = name
582586
self._set_axis(0, index)
583587

588+
if original_dtype is None and is_pandas_object and data_dtype == np.object_:
589+
if self.dtype != data_dtype:
590+
warnings.warn(
591+
"Dtype inference on a pandas object "
592+
"(Series, Index, ExtensionArray) is deprecated. The Series "
593+
"constructor will keep the original dtype in the future. "
594+
"Call `infer_objects` on the result to get the old behavior.",
595+
FutureWarning,
596+
stacklevel=find_stack_level(),
597+
)
598+
584599
def _init_dict(
585600
self, data, index: Index | None = None, dtype: DtypeObj | None = None
586601
):

pandas/core/strings/accessor.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -689,19 +689,18 @@ def cat(
689689
result = cat_safe(all_cols, sep)
690690

691691
out: Index | Series
692+
if isinstance(self._orig.dtype, CategoricalDtype):
693+
# We need to infer the new categories.
694+
dtype = self._orig.dtype.categories.dtype
695+
else:
696+
dtype = self._orig.dtype
692697
if isinstance(self._orig, ABCIndex):
693698
# add dtype for case that result is all-NA
694-
dtype = None
695699
if isna(result).all():
696-
dtype = object
700+
dtype = object # type: ignore[assignment]
697701

698702
out = Index(result, dtype=dtype, name=self._orig.name)
699703
else: # Series
700-
if isinstance(self._orig.dtype, CategoricalDtype):
701-
# We need to infer the new categories.
702-
dtype = self._orig.dtype.categories.dtype # type: ignore[assignment]
703-
else:
704-
dtype = self._orig.dtype
705704
res_ser = Series(
706705
result, dtype=dtype, index=data.index, name=self._orig.name, copy=False
707706
)

pandas/tests/copy_view/test_constructors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,8 @@ def test_dataframe_from_series_or_index_different_dtype(using_copy_on_write, con
314314

315315
def test_dataframe_from_series_infer_datetime(using_copy_on_write):
316316
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
317-
df = DataFrame(ser)
317+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
318+
df = DataFrame(ser)
318319
assert not np.shares_memory(get_array(ser), get_array(df, 0))
319320
if using_copy_on_write:
320321
assert df._mgr._has_no_reference(0)

pandas/tests/frame/test_constructors.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,23 @@ def test_frame_string_inference_block_dim(self):
27682768
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
27692769
assert df._mgr.blocks[0].ndim == 2
27702770

2771+
def test_inference_on_pandas_objects(self):
2772+
# GH#56012
2773+
idx = Index([Timestamp("2019-12-31")], dtype=object)
2774+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2775+
result = DataFrame(idx, columns=["a"])
2776+
assert result.dtypes.iloc[0] != np.object_
2777+
result = DataFrame({"a": idx})
2778+
assert result.dtypes.iloc[0] == np.object_
2779+
2780+
ser = Series([Timestamp("2019-12-31")], dtype=object)
2781+
2782+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2783+
result = DataFrame(ser, columns=["a"])
2784+
assert result.dtypes.iloc[0] != np.object_
2785+
result = DataFrame({"a": ser})
2786+
assert result.dtypes.iloc[0] == np.object_
2787+
27712788

27722789
class TestDataFrameConstructorIndexInference:
27732790
def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):

pandas/tests/indexes/base_class/test_constructors.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas import (
66
Index,
77
MultiIndex,
8+
Series,
89
)
910
import pandas._testing as tm
1011

@@ -57,3 +58,16 @@ def test_index_string_inference(self):
5758
with pd.option_context("future.infer_string", True):
5859
ser = Index(["a", 1])
5960
tm.assert_index_equal(ser, expected)
61+
62+
def test_inference_on_pandas_objects(self):
63+
# GH#56012
64+
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
65+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
66+
result = Index(idx)
67+
assert result.dtype != np.object_
68+
69+
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
70+
71+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
72+
result = Index(ser)
73+
assert result.dtype != np.object_

pandas/tests/indexes/test_base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def test_constructor_copy(self, index, using_infer_string):
104104
)
105105
def test_constructor_from_index_dtlike(self, cast_as_obj, index):
106106
if cast_as_obj:
107-
result = Index(index.astype(object))
107+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
108+
result = Index(index.astype(object))
108109
else:
109110
result = Index(index)
110111

0 commit comments

Comments
 (0)