From 69eee225e91de19187ceb33c22e3e5e4bb655bdd Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Dec 2020 17:15:09 -0800 Subject: [PATCH 1/2] REF: remove maybe_upcast_putmask --- pandas/core/dtypes/cast.py | 81 ------------------------- pandas/core/ops/array_ops.py | 9 ++- pandas/tests/dtypes/cast/test_upcast.py | 71 ---------------------- 3 files changed, 7 insertions(+), 154 deletions(-) delete mode 100644 pandas/tests/dtypes/cast/test_upcast.py diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 63445d0e1598d..80de6a648e775 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -419,87 +419,6 @@ def maybe_cast_to_extension_array( return result -def maybe_upcast_putmask( - result: np.ndarray, mask: np.ndarray, other: Scalar -) -> Tuple[np.ndarray, bool]: - """ - A safe version of putmask that potentially upcasts the result. - - The result is replaced with the first N elements of other, - where N is the number of True values in mask. - If the length of other is shorter than N, other will be repeated. - - Parameters - ---------- - result : ndarray - The destination array. This will be mutated in-place if no upcasting is - necessary. - mask : boolean ndarray - other : scalar - The source value. - - Returns - ------- - result : ndarray - changed : bool - Set to true if the result array was upcasted. - - Examples - -------- - >>> arr = np.arange(1, 6) - >>> mask = np.array([False, True, False, True, True]) - >>> result, _ = maybe_upcast_putmask(arr, mask, False) - >>> result - array([1, 0, 3, 0, 0]) - """ - if not isinstance(result, np.ndarray): - raise ValueError("The result input must be a ndarray.") - if not is_scalar(other): - # We _could_ support non-scalar other, but until we have a compelling - # use case, we assume away the possibility. - raise ValueError("other must be a scalar") - - if mask.any(): - # Two conversions for date-like dtypes that can't be done automatically - # in np.place: - # NaN -> NaT - # integer or integer array -> date-like array - if result.dtype.kind in ["m", "M"]: - if isna(other): - other = result.dtype.type("nat") - elif is_integer(other): - other = np.array(other, dtype=result.dtype) - - def changeit(): - # we are forced to change the dtype of the result as the input - # isn't compatible - r, _ = maybe_upcast(result, fill_value=other, copy=True) - np.place(r, mask, other) - - return r, True - - # we want to decide whether place will work - # if we have nans in the False portion of our mask then we need to - # upcast (possibly), otherwise we DON't want to upcast (e.g. if we - # have values, say integers, in the success portion then it's ok to not - # upcast) - new_dtype, _ = maybe_promote(result.dtype, other) - if new_dtype != result.dtype: - - # we have a scalar or len 0 ndarray - # and its nan and we are changing some values - if isna(other): - return changeit() - - try: - np.place(result, mask, other) - except TypeError: - # e.g. int-dtype result and float-dtype other - return changeit() - - return result, False - - def maybe_promote(dtype, fill_value=np.nan): """ Find the minimal dtype that can hold both the given dtype and fill_value. diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 22f674cc6a894..10976df23c5ae 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -16,7 +16,6 @@ from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, find_common_type, - maybe_upcast_putmask, ) from pandas.core.dtypes.common import ( ensure_object, @@ -110,7 +109,13 @@ def _masked_arith_op(x: np.ndarray, y, op): with np.errstate(all="ignore"): result[mask] = op(xrav[mask], y) - result, _ = maybe_upcast_putmask(result, ~mask, np.nan) + if not mask.all(): + try: + np.putmask(result, ~mask, np.nan) + except ValueError: + # e.g. result is int, need to cast + result = np.where(~mask, result, np.nan) + result = result.reshape(x.shape) # 2D compat return result diff --git a/pandas/tests/dtypes/cast/test_upcast.py b/pandas/tests/dtypes/cast/test_upcast.py deleted file mode 100644 index f9227a4e78a79..0000000000000 --- a/pandas/tests/dtypes/cast/test_upcast.py +++ /dev/null @@ -1,71 +0,0 @@ -import numpy as np -import pytest - -from pandas.core.dtypes.cast import maybe_upcast_putmask - -from pandas import Series -import pandas._testing as tm - - -@pytest.mark.parametrize("result", [Series([10, 11, 12]), [10, 11, 12], (10, 11, 12)]) -def test_upcast_error(result): - # GH23823 require result arg to be ndarray - mask = np.array([False, True, False]) - other = np.array([61, 62, 63]) - with pytest.raises(ValueError, match="The result input must be a ndarray"): - result, _ = maybe_upcast_putmask(result, mask, other) - - -@pytest.mark.parametrize( - "arr, other", - [ - (np.arange(1, 6), np.array([61, 62, 63])), - (np.arange(1, 6), np.array([61.1, 62.2, 63.3])), - (np.arange(10, 15), np.array([61, 62])), - (np.arange(10, 15), np.array([61, np.nan])), - ( - np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"), - np.arange("2018-01-01", "2018-01-04", dtype="datetime64[D]"), - ), - ( - np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"), - np.arange("2018-01-01", "2018-01-03", dtype="datetime64[D]"), - ), - ], -) -def test_upcast_scalar_other(arr, other): - # for now we do not support non-scalar `other` - mask = np.array([False, True, False, True, True]) - with pytest.raises(ValueError, match="other must be a scalar"): - maybe_upcast_putmask(arr, mask, other) - - -def test_upcast(): - # GH23823 - arr = np.arange(1, 6) - mask = np.array([False, True, False, True, True]) - result, changed = maybe_upcast_putmask(arr, mask, other=np.nan) - - expected = np.array([1, np.nan, 3, np.nan, np.nan]) - assert changed - tm.assert_numpy_array_equal(result, expected) - - -def test_upcast_datetime(): - # GH23823 - arr = np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]") - mask = np.array([False, True, False, True, True]) - result, changed = maybe_upcast_putmask(arr, mask, other=np.nan) - - expected = np.array( - [ - "2019-01-01", - np.datetime64("NaT"), - "2019-01-03", - np.datetime64("NaT"), - np.datetime64("NaT"), - ], - dtype="datetime64[D]", - ) - assert not changed - tm.assert_numpy_array_equal(result, expected) From c74e3e486e8c1365d8bc8be3cd222fcd3220317f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Dec 2020 17:46:39 -0800 Subject: [PATCH 2/2] REF: simplify maybe_upcast_putmask --- pandas/core/dtypes/cast.py | 48 +++++++++++++++++++++++++ pandas/core/ops/array_ops.py | 9 ++--- pandas/tests/dtypes/cast/test_upcast.py | 37 +++++++++++++++++++ 3 files changed, 87 insertions(+), 7 deletions(-) create mode 100644 pandas/tests/dtypes/cast/test_upcast.py diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 80de6a648e775..859783ace5006 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -419,6 +419,54 @@ def maybe_cast_to_extension_array( return result +def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray: + """ + A safe version of putmask that potentially upcasts the result. + + The result is replaced with the first N elements of other, + where N is the number of True values in mask. + If the length of other is shorter than N, other will be repeated. + + Parameters + ---------- + result : ndarray + The destination array. This will be mutated in-place if no upcasting is + necessary. + mask : boolean ndarray + + Returns + ------- + result : ndarray + + Examples + -------- + >>> arr = np.arange(1, 6) + >>> mask = np.array([False, True, False, True, True]) + >>> result = maybe_upcast_putmask(arr, mask) + >>> result + array([ 1., nan, 3., nan, nan]) + """ + if not isinstance(result, np.ndarray): + raise ValueError("The result input must be a ndarray.") + + # NB: we never get here with result.dtype.kind in ["m", "M"] + + if mask.any(): + + # we want to decide whether place will work + # if we have nans in the False portion of our mask then we need to + # upcast (possibly), otherwise we DON't want to upcast (e.g. if we + # have values, say integers, in the success portion then it's ok to not + # upcast) + new_dtype, _ = maybe_promote(result.dtype, np.nan) + if new_dtype != result.dtype: + result = result.astype(new_dtype, copy=True) + + np.place(result, mask, np.nan) + + return result + + def maybe_promote(dtype, fill_value=np.nan): """ Find the minimal dtype that can hold both the given dtype and fill_value. diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 10976df23c5ae..857840cf9d8b9 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -16,6 +16,7 @@ from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, find_common_type, + maybe_upcast_putmask, ) from pandas.core.dtypes.common import ( ensure_object, @@ -109,13 +110,7 @@ def _masked_arith_op(x: np.ndarray, y, op): with np.errstate(all="ignore"): result[mask] = op(xrav[mask], y) - if not mask.all(): - try: - np.putmask(result, ~mask, np.nan) - except ValueError: - # e.g. result is int, need to cast - result = np.where(~mask, result, np.nan) - + result = maybe_upcast_putmask(result, ~mask) result = result.reshape(x.shape) # 2D compat return result diff --git a/pandas/tests/dtypes/cast/test_upcast.py b/pandas/tests/dtypes/cast/test_upcast.py new file mode 100644 index 0000000000000..89b59ebe6628f --- /dev/null +++ b/pandas/tests/dtypes/cast/test_upcast.py @@ -0,0 +1,37 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_upcast_putmask + +from pandas import Series +import pandas._testing as tm + + +@pytest.mark.parametrize("result", [Series([10, 11, 12]), [10, 11, 12], (10, 11, 12)]) +def test_upcast_error(result): + # GH23823 require result arg to be ndarray + mask = np.array([False, True, False]) + with pytest.raises(ValueError, match="The result input must be a ndarray"): + result = maybe_upcast_putmask(result, mask) + + +def test_upcast(): + # GH23823 + arr = np.arange(1, 6) + mask = np.array([False, True, False, True, True]) + result = maybe_upcast_putmask(arr, mask) + + expected = np.array([1, np.nan, 3, np.nan, np.nan]) + tm.assert_numpy_array_equal(result, expected) + + +def test_maybe_upcast_putmask_bool(): + # a case where maybe_upcast_putmask is *not* equivalent to + # try: np.putmask(result, mask, np.nan) + # except (ValueError, TypeError): result = np.where(mask, result, np.nan) + arr = np.array([True, False, True, False, True], dtype=bool) + mask = np.array([False, True, False, True, True]) + result = maybe_upcast_putmask(arr, mask) + + expected = np.array([True, np.nan, True, np.nan, np.nan], dtype=object) + tm.assert_numpy_array_equal(result, expected)