diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index ab832c145a052..6ca43aebed89c 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -378,9 +378,6 @@ class NaTType(_NaT): def __reduce__(self): return (__nat_unpickle, (None, )) - def __rdiv__(self, other): - return _nat_rdivide_op(self, other) - def __rtruediv__(self, other): return _nat_rdivide_op(self, other) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a7252b6a7b7a2..f16ff32dee7bc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -63,7 +63,10 @@ needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat -from pandas.core.dtypes.dtypes import PandasDtype +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) from pandas.core.dtypes.generic import ( ABCDatetimeArray, ABCExtensionArray, @@ -492,7 +495,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]: elif needs_i8_conversion(values.dtype): return isin(comps, values.astype(object)) - elif is_extension_array_dtype(values.dtype): + elif isinstance(values.dtype, ExtensionDtype): return isin(np.asarray(comps), np.asarray(values)) # GH16012 @@ -511,19 +514,7 @@ def f(c, v): f = np.in1d else: - # error: List item 0 has incompatible type "Union[Any, dtype[Any], - # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]" - # error: List item 1 has incompatible type "Union[Any, ExtensionDtype]"; - # expected "Union[dtype[Any], None, type, _SupportsDType, str, Tuple[Any, - # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]" - # error: List item 1 has incompatible type "Union[dtype[Any], ExtensionDtype]"; - # expected "Union[dtype[Any], None, type, _SupportsDType, str, Tuple[Any, - # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]" - common = np.find_common_type( - [values.dtype, comps.dtype], [] # type: ignore[list-item] - ) + common = np.find_common_type([values.dtype, comps.dtype], []) values = values.astype(common, copy=False) comps = comps.astype(common, copy=False) f = htable.ismember diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 4c868747fa930..e1f80c5894bb1 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -127,7 +127,7 @@ def coerce_to_array( return values, mask values = np.array(values, copy=copy) - if is_object_dtype(values): + if is_object_dtype(values.dtype): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "empty": pass diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 3587575503d33..443dfa4122389 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -177,7 +177,7 @@ def coerce_to_array( values = np.array(values, copy=copy) inferred_type = None - if is_object_dtype(values) or is_string_dtype(values): + if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "empty": pass diff --git a/pandas/core/base.py b/pandas/core/base.py index 285afc05f905c..ef3a60f46283a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -55,6 +55,7 @@ from pandas.core import ( algorithms, + nanops, ops, ) from pandas.core.accessor import DirNamesMixin @@ -70,7 +71,6 @@ ensure_wrapped_if_datetimelike, extract_array, ) -import pandas.core.nanops as nanops if TYPE_CHECKING: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b70ea9f816aef..4f4eac828fd60 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -107,6 +107,8 @@ _int32_max = np.iinfo(np.int32).max _int64_max = np.iinfo(np.int64).max +_dtype_obj = np.dtype(object) + NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray) @@ -123,7 +125,7 @@ def maybe_convert_platform( # or ExtensionArray here. arr = values - if arr.dtype == object: + if arr.dtype == _dtype_obj: arr = cast(np.ndarray, arr) arr = lib.maybe_convert_objects(arr) @@ -159,7 +161,7 @@ def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: ------- scalar """ - if dtype == object: + if dtype == _dtype_obj: pass elif isinstance(value, (np.datetime64, datetime)): value = Timestamp(value) @@ -662,9 +664,7 @@ def _ensure_dtype_type(value, dtype: np.dtype): """ # Start with exceptions in which we do _not_ cast to numpy types - # error: Non-overlapping equality check (left operand type: "dtype[Any]", right - # operand type: "Type[object_]") - if dtype == np.object_: # type: ignore[comparison-overlap] + if dtype == _dtype_obj: return value # Note: before we get here we have already excluded isna(value) @@ -1111,10 +1111,7 @@ def astype_nansafe( raise ValueError("dtype must be np.dtype or ExtensionDtype") if arr.dtype.kind in ["m", "M"] and ( - issubclass(dtype.type, str) - # error: Non-overlapping equality check (left operand type: "dtype[Any]", right - # operand type: "Type[object]") - or dtype == object # type: ignore[comparison-overlap] + issubclass(dtype.type, str) or dtype == _dtype_obj ): from pandas.core.construction import ensure_wrapped_if_datetimelike @@ -1124,7 +1121,7 @@ def astype_nansafe( if issubclass(dtype.type, str): return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False) - elif is_datetime64_dtype(arr): + elif is_datetime64_dtype(arr.dtype): # Non-overlapping equality check (left operand type: "dtype[Any]", right # operand type: "Type[signedinteger[Any]]") if dtype == np.int64: # type: ignore[comparison-overlap] @@ -1146,7 +1143,7 @@ def astype_nansafe( raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") - elif is_timedelta64_dtype(arr): + elif is_timedelta64_dtype(arr.dtype): # error: Non-overlapping equality check (left operand type: "dtype[Any]", right # operand type: "Type[signedinteger[Any]]") if dtype == np.int64: # type: ignore[comparison-overlap] @@ -1170,7 +1167,7 @@ def astype_nansafe( elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer): return astype_float_to_int_nansafe(arr, dtype, copy) - elif is_object_dtype(arr): + elif is_object_dtype(arr.dtype): # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): @@ -1718,7 +1715,7 @@ def maybe_cast_to_datetime( # and no coercion specified value = sanitize_to_nanoseconds(value) - elif value.dtype == object: + elif value.dtype == _dtype_obj: value = maybe_infer_to_datetimelike(value) elif isinstance(value, list): @@ -1862,9 +1859,7 @@ def construct_2d_arraylike_from_scalar( if dtype.kind in ["m", "M"]: value = maybe_unbox_datetimelike_tz_deprecation(value, dtype) - # error: Non-overlapping equality check (left operand type: "dtype[Any]", right - # operand type: "Type[object]") - elif dtype == object: # type: ignore[comparison-overlap] + elif dtype == _dtype_obj: if isinstance(value, (np.timedelta64, np.datetime64)): # calling np.array below would cast to pytimedelta/pydatetime out = np.empty(shape, dtype=object) @@ -2190,9 +2185,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: # ExtensionBlock._can_hold_element return True - # error: Non-overlapping equality check (left operand type: "dtype[Any]", right - # operand type: "Type[object]") - if dtype == object: # type: ignore[comparison-overlap] + if dtype == _dtype_obj: return True tipo = maybe_infer_dtype_type(element) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 257916630e457..922a5e5758979 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7997,13 +7997,13 @@ def pivot(self, index=None, columns=None, values=None) -> DataFrame: ... aggfunc={'D': np.mean, ... 'E': [min, max, np.mean]}) >>> table - D E - mean max mean min + D E + mean max mean min A C - bar large 5.500000 9.0 7.500000 6.0 - small 5.500000 9.0 8.500000 8.0 - foo large 2.000000 5.0 4.500000 4.0 - small 2.333333 6.0 4.333333 2.0 + bar large 5.500000 9 7.500000 6 + small 5.500000 9 8.500000 8 + foo large 2.000000 5 4.500000 4 + small 2.333333 6 4.333333 2 """ @Substitution("") diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 74044e55b5de6..d5b1292435f04 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -443,9 +443,8 @@ def __new__( return Index._simple_new(data, name=name) elif is_ea_or_datetimelike_dtype(data_dtype): - # Argument 1 to "_dtype_to_subclass" of "Index" has incompatible type - # "Optional[Any]"; expected "Union[dtype[Any], ExtensionDtype]" [arg-type] - klass = cls._dtype_to_subclass(data_dtype) # type: ignore[arg-type] + data_dtype = cast(DtypeObj, data_dtype) + klass = cls._dtype_to_subclass(data_dtype) if klass is not Index: result = klass(data, copy=copy, name=name, **kwargs) if dtype is not None: @@ -6245,7 +6244,7 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default): # wish to have special treatment for floats/ints, e.g. Float64Index and # datetimelike Indexes # reject them, if index does not contain label - if (is_float(label) or is_integer(label)) and label not in self._values: + if (is_float(label) or is_integer(label)) and label not in self: raise self._invalid_indexer("slice", label) return label diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9558b82d95fde..6ef8d90d7dcf2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -762,7 +762,7 @@ def replace_list( src_len = len(pairs) - 1 - if is_string_dtype(values): + if is_string_dtype(values.dtype): # Calculate the mask once, prior to the call of comp # in order to avoid repeating the same computations mask = ~isna(values) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d40f8c69e1b7c..fc6db78320169 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -60,11 +60,11 @@ def pivot_table( columns=None, aggfunc: AggFuncType = "mean", fill_value=None, - margins=False, - dropna=True, - margins_name="All", - observed=False, - sort=True, + margins: bool = False, + dropna: bool = True, + margins_name: str = "All", + observed: bool = False, + sort: bool = True, ) -> DataFrame: index = _convert_by(index) columns = _convert_by(columns) @@ -178,13 +178,12 @@ def __internal_pivot_table( and v in agged and not is_integer_dtype(agged[v]) ): - if isinstance(agged[v], ABCDataFrame): + if not isinstance(agged[v], ABCDataFrame): # exclude DataFrame case bc maybe_downcast_to_dtype expects # ArrayLike - # TODO: why does test_pivot_table_doctest_case fail if - # we don't do this apparently-unnecessary setitem? - agged[v] = agged[v] - else: + # e.g. test_pivot_table_multiindex_columns_doctest_case + # agged.columns is a MultiIndex and 'v' is indexing only + # on its first level. agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) table = agged @@ -253,7 +252,7 @@ def __internal_pivot_table( def _add_margins( table: DataFrame | Series, - data, + data: DataFrame, values, rows, cols, @@ -331,7 +330,7 @@ def _add_margins( return result -def _compute_grand_margin(data, values, aggfunc, margins_name: str = "All"): +def _compute_grand_margin(data: DataFrame, values, aggfunc, margins_name: str = "All"): if values: grand_margin = {} @@ -522,7 +521,7 @@ def crosstab( rownames=None, colnames=None, aggfunc=None, - margins=False, + margins: bool = False, margins_name: str = "All", dropna: bool = True, normalize=False, @@ -682,7 +681,9 @@ def crosstab( return table -def _normalize(table, normalize, margins: bool, margins_name="All"): +def _normalize( + table: DataFrame, normalize, margins: bool, margins_name="All" +) -> DataFrame: if not isinstance(normalize, (bool, str)): axis_subs = {0: "index", 1: "columns"} diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c2cd73584b7da..01f0efea15b89 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -679,7 +679,7 @@ def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex: def _stack_multi_columns(frame, level_num=-1, dropna=True): - def _convert_level_number(level_num, columns): + def _convert_level_number(level_num: int, columns): """ Logic for converting the level number to something we can safely pass to swaplevel. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 88607f4b036a0..035e886f1906e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2077,11 +2077,12 @@ def agg(arr): with pytest.raises(KeyError, match="notpresent"): foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) - def test_pivot_table_doctest_case(self): - # TODO: better name. the relevant characteristic is that - # the call to maybe_downcast_to_dtype(agged[v], data[v].dtype) in + def test_pivot_table_multiindex_columns_doctest_case(self): + # The relevant characteristic is that the call + # to maybe_downcast_to_dtype(agged[v], data[v].dtype) in # __internal_pivot_table has `agged[v]` a DataFrame instead of Series, - # i.e agged.columns is not unique + # In this case this is because agged.columns is a MultiIndex and 'v' + # is only indexing on its first level. df = DataFrame( { "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], @@ -2124,6 +2125,8 @@ def test_pivot_table_doctest_case(self): ] ) expected = DataFrame(vals, columns=cols, index=index) + expected[("E", "min")] = expected[("E", "min")].astype(np.int64) + expected[("E", "max")] = expected[("E", "max")].astype(np.int64) tm.assert_frame_equal(table, expected) def test_pivot_table_sort_false(self):