From 05dd43fadff7dd14bab89ed537f1129cee63deef Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 1 Apr 2023 12:12:46 -0700 Subject: [PATCH 1/2] PERF: extract_array --- pandas/core/base.py | 9 ++------- pandas/core/construction.py | 40 ++++++++++++++++++++++++++----------- pandas/core/nanops.py | 40 +++++++++++++------------------------ pandas/tests/test_nanops.py | 4 ++-- 4 files changed, 46 insertions(+), 47 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 0a46b8d9c6e3a..e34c0a25a1858 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -907,17 +907,12 @@ def _map_values(self, mapper, na_action=None, convert: bool = True): If the function returns a tuple with more than one element a MultiIndex will be returned. """ - arr = extract_array(self, extract_numpy=True, extract_range=True) + arr = self._values if isinstance(arr, ExtensionArray): return arr.map(mapper, na_action=na_action) - # Argument 1 to "map_array" has incompatible type - # "Union[IndexOpsMixin, ndarray[Any, Any]]"; - # expected "Union[ExtensionArray, ndarray[Any, Any]] - return algorithms.map_array( - arr, mapper, na_action=na_action, convert=convert # type: ignore[arg-type] - ) + return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert) @final def value_counts( diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8c5f291742b9b..bc71804221ddf 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -55,8 +55,6 @@ ABCDataFrame, ABCExtensionArray, ABCIndex, - ABCPandasArray, - ABCRangeIndex, ABCSeries, ) from pandas.core.dtypes.missing import isna @@ -379,6 +377,21 @@ def array( return PandasArray._from_sequence(data, dtype=dtype, copy=copy) +_typs = frozenset( + { + "index", + "rangeindex", + "multiindex", + "datetimeindex", + "timedeltaindex", + "periodindex", + "categoricalindex", + "intervalindex", + "series", + } +) + + @overload def extract_array( obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ... @@ -438,19 +451,22 @@ def extract_array( >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True) array([1, 2, 3]) """ - if isinstance(obj, (ABCIndex, ABCSeries)): - if isinstance(obj, ABCRangeIndex): + typ = getattr(obj, "_typ", None) + if typ in _typs: + # i.e. isinstance(obj, (ABCIndex, ABCSeries)) + if typ == "rangeindex": if extract_range: - return obj._values - # https://github.com/python/mypy/issues/1081 - # error: Incompatible return value type (got "RangeIndex", expected - # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]") - return obj # type: ignore[return-value] + # error: "T" has no attribute "_values" + return obj._values # type: ignore[attr-defined] + return obj - return obj._values + # error: "T" has no attribute "_values" + return obj._values # type: ignore[attr-defined] - elif extract_numpy and isinstance(obj, ABCPandasArray): - return obj.to_numpy() + elif extract_numpy and typ == "npy_extension": + # i.e. isinstance(obj, ABCPandasArray) + # error: "T" has no attribute "to_numpy" + return obj.to_numpy() # type: ignore[attr-defined] return obj diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 43e44c7882cca..137fe67630968 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -57,8 +57,6 @@ notna, ) -from pandas.core.construction import extract_array - bn = import_optional_dependency("bottleneck", errors="warn") _BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False @@ -308,9 +306,6 @@ def _get_values( # with scalar fill_value. This guarantee is important for the # np.where call below assert is_scalar(fill_value) - # error: Incompatible types in assignment (expression has type "Union[Any, - # Union[ExtensionArray, ndarray]]", variable has type "ndarray") - values = extract_array(values, extract_numpy=True) # type: ignore[assignment] mask = _maybe_get_mask(values, skipna, mask) @@ -522,12 +517,12 @@ def nanany( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, 2]) - >>> nanops.nanany(s) + >>> nanops.nanany(s.values) True >>> from pandas.core import nanops >>> s = pd.Series([np.nan]) - >>> nanops.nanany(s) + >>> nanops.nanany(s.values) False """ if needs_i8_conversion(values.dtype) and values.dtype.kind != "m": @@ -577,12 +572,12 @@ def nanall( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, 2, np.nan]) - >>> nanops.nanall(s) + >>> nanops.nanall(s.values) True >>> from pandas.core import nanops >>> s = pd.Series([1, 0]) - >>> nanops.nanall(s) + >>> nanops.nanall(s.values) False """ if needs_i8_conversion(values.dtype) and values.dtype.kind != "m": @@ -637,7 +632,7 @@ def nansum( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, 2, np.nan]) - >>> nanops.nansum(s) + >>> nanops.nansum(s.values) 3.0 """ values, mask, dtype, dtype_max, _ = _get_values( @@ -705,7 +700,7 @@ def nanmean( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, 2, np.nan]) - >>> nanops.nanmean(s) + >>> nanops.nanmean(s.values) 1.5 """ values, mask, dtype, dtype_max, _ = _get_values( @@ -761,7 +756,7 @@ def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask= -------- >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 2]) - >>> nanops.nanmedian(s) + >>> nanops.nanmedian(s.values) 2.0 """ @@ -928,7 +923,7 @@ def nanstd( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 3]) - >>> nanops.nanstd(s) + >>> nanops.nanstd(s.values) 1.0 """ if values.dtype == "M8[ns]": @@ -944,7 +939,7 @@ def nanstd( @disallow("M8", "m8") @bottleneck_switch(ddof=1) def nanvar( - values, + values: np.ndarray, *, axis: AxisInt | None = None, skipna: bool = True, @@ -975,10 +970,9 @@ def nanvar( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 3]) - >>> nanops.nanvar(s) + >>> nanops.nanvar(s.values) 1.0 """ - values = extract_array(values, extract_numpy=True) dtype = values.dtype mask = _maybe_get_mask(values, skipna, mask) if is_any_int_dtype(dtype): @@ -1050,7 +1044,7 @@ def nansem( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 3]) - >>> nanops.nansem(s) + >>> nanops.nansem(s.values) 0.5773502691896258 """ # This checks if non-numeric-like data is passed with numeric_only=False @@ -1229,12 +1223,9 @@ def nanskew( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 1, 2]) - >>> nanops.nanskew(s) + >>> nanops.nanskew(s.values) 1.7320508075688787 """ - # error: Incompatible types in assignment (expression has type "Union[Any, - # Union[ExtensionArray, ndarray]]", variable has type "ndarray") - values = extract_array(values, extract_numpy=True) # type: ignore[assignment] mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") @@ -1319,12 +1310,9 @@ def nankurt( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 1, 3, 2]) - >>> nanops.nankurt(s) + >>> nanops.nankurt(s.values) -1.2892561983471076 """ - # error: Incompatible types in assignment (expression has type "Union[Any, - # Union[ExtensionArray, ndarray]]", variable has type "ndarray") - values = extract_array(values, extract_numpy=True) # type: ignore[assignment] mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") @@ -1413,7 +1401,7 @@ def nanprod( -------- >>> from pandas.core import nanops >>> s = pd.Series([1, 2, 3, np.nan]) - >>> nanops.nanprod(s) + >>> nanops.nanprod(s.values) 6.0 """ mask = _maybe_get_mask(values, skipna, mask) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index ba21ea4e7db95..05db055246a5d 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1238,8 +1238,8 @@ def test_nanops_independent_of_mask_param(operation): # GH22764 ser = Series([1, 2, np.nan, 3, np.nan, 4]) mask = ser.isna() - median_expected = operation(ser) - median_result = operation(ser, mask=mask) + median_expected = operation(ser._values) + median_result = operation(ser._values, mask=mask) assert median_expected == median_result From 6e35cbea347655dc593f06679b2c0ce2d0ffc1ac Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 1 Apr 2023 18:54:27 -0700 Subject: [PATCH 2/2] update test --- pandas/tests/groupby/test_function.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 512f17b89df2f..5f6f99370080f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -17,7 +17,6 @@ date_range, ) import pandas._testing as tm -from pandas.core import nanops from pandas.tests.groupby import get_groupby_method_args from pandas.util import _test_decorators as td @@ -365,7 +364,7 @@ def test_cython_median(): labels[::17] = np.nan result = df.groupby(labels).median() - exp = df.groupby(labels).agg(nanops.nanmedian) + exp = df.groupby(labels).agg(np.nanmedian) tm.assert_frame_equal(result, exp) df = DataFrame(np.random.randn(1000, 5))