diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index d86c60d78195b..91ba6e9ea7a00 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -222,10 +222,8 @@ def _concat_same_type( raise ValueError("to_concat must have the same dtype (tz)", dtypes) new_values = [x._ndarray for x in to_concat] - new_values = np.concatenate(new_values, axis=axis) - # error: Argument 1 to "_from_backing_data" of "NDArrayBackedExtensionArray" has - # incompatible type "List[ndarray]"; expected "ndarray" - return to_concat[0]._from_backing_data(new_values) # type: ignore[arg-type] + new_arr = np.concatenate(new_values, axis=axis) + return to_concat[0]._from_backing_data(new_arr) @doc(ExtensionArray.searchsorted) def searchsorted( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 93d32217ef322..458960225f206 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1095,7 +1095,7 @@ def _sub_datetimelike_scalar(self, other): _sub_datetime_arraylike = _sub_datetimelike_scalar - def _sub_period(self, other): + def _sub_period(self, other: Period): # Overridden by PeriodArray raise TypeError(f"cannot subtract Period from a {type(self).__name__}") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 0bac4fe59d51c..2368af0de1bf3 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -707,11 +707,7 @@ def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray: # ------------------------------------------------------------------ # Arithmetic Methods - def _sub_datelike(self, other): - assert other is not NaT - return NotImplemented - - def _sub_period(self, other): + def _sub_period(self, other: Period): # If the operation is well-defined, we return an object-Index # of DateOffsets. Null entries are filled with pd.NaT self._check_compatible_with(other) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 25c4a15127200..15ec1006b4f80 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -838,10 +838,10 @@ def _first_fill_value_loc(self): return np.searchsorted(diff, 2) + 1 def unique(self: SparseArrayT) -> SparseArrayT: - uniques = list(algos.unique(self.sp_values)) + uniques = algos.unique(self.sp_values) fill_loc = self._first_fill_value_loc() if fill_loc >= 0: - uniques.insert(fill_loc, self.fill_value) + uniques = np.insert(uniques, fill_loc, self.fill_value) return type(self)._from_sequence(uniques, dtype=self.dtype) def _values_for_factorize(self): @@ -1351,8 +1351,6 @@ def to_dense(self) -> np.ndarray: """ return np.asarray(self, dtype=self.sp_values.dtype) - _internal_get_values = to_dense - def _where(self, mask, value): # NB: may not preserve dtype, e.g. result may be Sparse[float64] # while self is Sparse[int64] diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 8c1644eda5234..b6bb5faeebdee 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -354,7 +354,9 @@ def update_dtype(self, dtype) -> SparseDtype: if not isinstance(dtype, np.dtype): raise TypeError("sparse arrays of extension dtypes not supported") - fill_value = astype_nansafe(np.array(self.fill_value), dtype).item() + fvarr = astype_nansafe(np.array(self.fill_value), dtype) + # NB: not fv_0d.item(), as that casts dt64->int + fill_value = fvarr[0] dtype = cls(dtype, fill_value=fill_value) return dtype diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 929dfa5c12078..cba055d5b4345 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -379,6 +379,13 @@ def trans(x): if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol): return new_result + elif dtype.kind == result.dtype.kind == "c": + new_result = result.astype(dtype) + + if array_equivalent(new_result, result): + # TODO: use tolerance like we do for float? + return new_result + return result diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 50a56761eda8c..d728cf4c8a9a2 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -127,10 +127,6 @@ def test_dense_repr(self, vals, fill_value): res = arr.to_dense() tm.assert_numpy_array_equal(res, vals) - res2 = arr._internal_get_values() - - tm.assert_numpy_array_equal(res2, vals) - @pytest.mark.parametrize("fix", ["arr", "zarr"]) def test_pickle(self, fix, request): obj = request.getfixturevalue(fix) diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 88efd0f4ea09f..6761040d444a5 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -85,7 +85,6 @@ def test_astype_all(self, any_real_numpy_dtype): np.array([0, 1], dtype="datetime64[ns]"), dtype=SparseDtype("datetime64[ns]", Timestamp("1970")), ), - marks=[pytest.mark.xfail(reason="NumPy-7619")], ), ( SparseArray([0, 1, 10]), diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 3fd50c08f5cc4..6379dfe2efefe 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -27,7 +27,7 @@ def test_value_counts_default_dropna(self, data): def test_value_counts(self, all_data, dropna): all_data = all_data[:10] if dropna: - other = np.array(all_data[~all_data.isna()]) + other = all_data[~all_data.isna()] else: other = all_data @@ -50,6 +50,10 @@ def test_value_counts_with_normalize(self, data): expected = pd.Series(0.0, index=result.index) expected[result > 0] = 1 / len(values) + if isinstance(data.dtype, pd.core.dtypes.dtypes.BaseMaskedDtype): + # TODO(GH#44692): avoid special-casing + expected = expected.astype("Float64") + self.assert_series_equal(result, expected) def test_count(self, data_missing): diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 87e526f868e3f..710e83c0c48a4 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -226,14 +226,6 @@ def test_searchsorted(self, data_for_sorting, as_series): sorter = np.array([1, 0]) assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 - @pytest.mark.xfail(reason="uses nullable integer") - def test_value_counts(self, all_data, dropna): - return super().test_value_counts(all_data, dropna) - - @pytest.mark.xfail(reason="uses nullable integer") - def test_value_counts_with_normalize(self, data): - super().test_value_counts_with_normalize(data) - def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting): # override because there are only 2 unique values diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index 4f8c46c8720f2..ff0ff7399e3e6 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -173,24 +173,7 @@ class TestMissing(base.BaseMissingTests): class TestMethods(base.BaseMethodsTests): - @pytest.mark.parametrize("dropna", [True, False]) - def test_value_counts(self, all_data, dropna): - all_data = all_data[:10] - if dropna: - other = np.array(all_data[~all_data.isna()]) - else: - other = all_data - - result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() - expected = pd.Series(other).value_counts(dropna=dropna).sort_index() - expected = expected.astype("Int64") - expected.index = expected.index.astype(all_data.dtype) - - self.assert_series_equal(result, expected) - - @pytest.mark.xfail(reason="uses nullable integer") - def test_value_counts_with_normalize(self, data): - super().test_value_counts_with_normalize(data) + pass class TestCasting(base.BaseCastingTests): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 8c8202e49ac3f..a6cf820dc7609 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -196,24 +196,7 @@ class TestMissing(base.BaseMissingTests): class TestMethods(base.BaseMethodsTests): - @pytest.mark.parametrize("dropna", [True, False]) - def test_value_counts(self, all_data, dropna): - all_data = all_data[:10] - if dropna: - other = np.array(all_data[~all_data.isna()]) - else: - other = all_data - - result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() - expected = pd.Series(other).value_counts(dropna=dropna).sort_index() - expected = expected.astype("Int64") - expected.index = expected.index.astype(all_data.dtype) - - self.assert_series_equal(result, expected) - - @pytest.mark.xfail(reason="uses nullable integer") - def test_value_counts_with_normalize(self, data): - super().test_value_counts_with_normalize(data) + pass class TestCasting(base.BaseCastingTests): diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 5415c99f5239a..2a660583f1396 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1463,6 +1463,7 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_null_group_str_transformer_series(request, dropna, transformation_func): # GH 17093 if transformation_func == "tshift": diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index 7075cfde2e828..b062dfb7a10ba 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -48,11 +48,6 @@ def test_mutability(index): def test_map_identity_mapping(index, request): # GH#12766 - if index.dtype == np.complex64: - mark = pytest.mark.xfail( - reason="maybe_downcast_to_dtype doesn't handle complex" - ) - request.node.add_marker(mark) result = index.map(lambda x: x) if index.dtype == object and result.dtype == bool: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 71a1f926d64cb..6e979060ab21e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -540,11 +540,6 @@ def test_map_dictlike(self, index, mapper, request): elif not index.is_unique: # Cannot map duplicated index return - if index.dtype == np.complex64 and not isinstance(mapper(index, index), Series): - mark = pytest.mark.xfail( - reason="maybe_downcast_to_dtype doesn't handle complex" - ) - request.node.add_marker(mark) rng = np.arange(len(index), 0, -1) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index b8cee317af287..13af94feaf744 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -284,7 +284,7 @@ def test_multiply(self, values_for_np_reduce, box_with_array, request): obj = box(values) if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index: - mark = pytest.mark.xfail(reason="SparseArray has no 'mul'") + mark = pytest.mark.xfail(reason="SparseArray has no 'prod'") request.node.add_marker(mark) if values.dtype.kind in "iuf":