diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e9568a7f15d97..a114e5346c069 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -656,8 +656,10 @@ Other Deprecations - Deprecated ``numeric_only=None`` in :meth:`DataFrame.rank`; in a future version ``numeric_only`` must be either ``True`` or ``False`` (the default) (:issue:`45036`) - Deprecated the behavior of :meth:`Timestamp.utcfromtimestamp`, in the future it will return a timezone-aware UTC :class:`Timestamp` (:issue:`22451`) - Deprecated :meth:`NaT.freq` (:issue:`45071`) +- Deprecated behavior of :class:`Series` and :class:`DataFrame` construction when passed float-dtype data containing ``NaN`` and an integer dtype ignoring the dtype argument; in a future version this will raise (:issue:`40110`) - + .. --------------------------------------------------------------------------- .. _whatsnew_140.performance: diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 17fa2d6e2f388..e496125683c09 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -535,6 +535,15 @@ def sanitize_array( try: subarr = _try_cast(data, dtype, copy, True) except IntCastingNaNError: + warnings.warn( + "In a future version, passing float-dtype values containing NaN " + "and an integer dtype will raise IntCastingNaNError " + "(subclass of ValueError) instead of silently ignoring the " + "passed dtype. To retain the old behavior, call Series(arr) or " + "DataFrame(arr) without passing a dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) subarr = np.array(data, copy=copy) except ValueError: if not raise_cast_failure: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7cf2721621a03..7f030fc11a20b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -97,12 +97,14 @@ def test_constructor_dict_with_tzaware_scalar(self): def test_construct_ndarray_with_nas_and_int_dtype(self): # GH#26919 match Series by not casting np.nan to meaningless int arr = np.array([[1, np.nan], [2, 3]]) - df = DataFrame(arr, dtype="i8") + with tm.assert_produces_warning(FutureWarning): + df = DataFrame(arr, dtype="i8") assert df.values.dtype == arr.dtype assert isna(df.iloc[0, 1]) # check this matches Series behavior - ser = Series(arr[0], dtype="i8", name=0) + with tm.assert_produces_warning(FutureWarning): + ser = Series(arr[0], dtype="i8", name=0) expected = df.iloc[0] tm.assert_series_equal(ser, expected) @@ -937,7 +939,11 @@ def _check_basic_constructor(self, empty): assert len(frame.index) == 3 assert len(frame.columns) == 1 - frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64) + warn = None if empty is np.ones else FutureWarning + with tm.assert_produces_warning(warn): + frame = DataFrame( + mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64 + ) if empty is np.ones: # passing dtype casts assert frame.values.dtype == np.int64 @@ -1766,7 +1772,9 @@ def test_constructor_mix_series_nonseries(self, float_frame): DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]}) def test_constructor_miscast_na_int_dtype(self): - df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) + msg = "float-dtype values containing NaN and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) expected = DataFrame([[np.nan, 1], [1, 0]]) tm.assert_frame_equal(df, expected) @@ -2713,10 +2721,19 @@ def test_floating_values_integer_dtype(self): # if they can be cast losslessly, no warning DataFrame(arr.round(), dtype="i8") - # with NaNs, we already have the correct behavior, so no warning + # with NaNs, we go through a different path with a different warning arr[0, 0] = np.nan - with tm.assert_produces_warning(None): + msg = "passing float-dtype values containing NaN" + with tm.assert_produces_warning(FutureWarning, match=msg): DataFrame(arr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + Series(arr[0], dtype="i8") + # The future (raising) behavior matches what we would get via astype: + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + DataFrame(arr).astype("i8") + with pytest.raises(ValueError, match=msg): + Series(arr[0]).astype("i8") class TestDataFrameConstructorWithDatetimeTZ: diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index eecae31bec914..e73885ebcc2c8 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1046,8 +1046,8 @@ def _test_stack_with_multiindex(multiindex): names=[None, "Lower"], ), columns=Index(["B", "C"], name="Upper"), - dtype=df.dtypes[0], ) + expected["B"] = expected["B"].astype(df.dtypes[0]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("ordered", [False, True]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 00a958f58cc93..7300b8f03ade6 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -652,8 +652,10 @@ def test_constructor_sanitize(self): s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8") assert s.dtype == np.dtype("i8") - s = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8") - assert s.dtype == np.dtype("f8") + msg = "float-dtype values containing NaN and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8") + assert ser.dtype == np.dtype("f8") def test_constructor_copy(self): # GH15125