From 70a5b3476d31892d06a32167e451aae7bd3ce1d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 2 Oct 2023 13:58:08 -0700 Subject: [PATCH 1/4] CLN: assorted --- doc/redirects.csv | 1 - .../development/contributing_codebase.rst | 2 +- doc/source/reference/extensions.rst | 2 ++ pandas/core/indexes/base.py | 24 +++++++++-------- pandas/core/series.py | 3 ++- pandas/core/sorting.py | 2 +- pandas/tests/indexing/test_loc.py | 13 +++++++++ pandas/tests/tslibs/test_npy_units.py | 27 +++++++++++++++++++ 8 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 pandas/tests/tslibs/test_npy_units.py diff --git a/doc/redirects.csv b/doc/redirects.csv index 97cd20b295e65..bd60cc6a732bd 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -127,7 +127,6 @@ generated/pandas.api.types.is_number,../reference/api/pandas.api.types.is_number generated/pandas.api.types.is_numeric_dtype,../reference/api/pandas.api.types.is_numeric_dtype generated/pandas.api.types.is_object_dtype,../reference/api/pandas.api.types.is_object_dtype generated/pandas.api.types.is_period_dtype,../reference/api/pandas.api.types.is_period_dtype -generated/pandas.api.types.is_period,../reference/api/pandas.api.types.is_period generated/pandas.api.types.is_re_compilable,../reference/api/pandas.api.types.is_re_compilable generated/pandas.api.types.is_re,../reference/api/pandas.api.types.is_re generated/pandas.api.types.is_scalar,../reference/api/pandas.api.types.is_scalar diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index 41f4b4d5783ea..e0aa8be066914 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -528,7 +528,7 @@ If a test is known to fail but the manner in which it fails is not meant to be captured, use ``pytest.mark.xfail`` It is common to use this method for a test that exhibits buggy behavior or a non-implemented feature. If the failing test has flaky behavior, use the argument ``strict=False``. This -will make it so pytest does not fail if the test happens to pass. +will make it so pytest does not fail if the test happens to pass. Using ``strict=False`` is highly undesirable, please use it only as a last resort. Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param`` over usage within a test so that the test is appropriately marked during the diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 83f830bb11198..3c48d65f8bd8d 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -67,6 +67,8 @@ objects. api.extensions.ExtensionArray.ndim api.extensions.ExtensionArray.shape api.extensions.ExtensionArray.tolist + api.extensions.ExtensionArray.transpose + api.extensions.ExtensionArray.T Additionally, we have some utility methods for ensuring your object behaves correctly. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9017ff121976b..3ae5720f5c828 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3501,7 +3501,7 @@ def _intersection(self, other: Index, sort: bool = False): pass else: # TODO: algos.unique1d should preserve DTA/TDA - if is_numeric_dtype(self): + if is_numeric_dtype(self.dtype): # This is faster, because Index.unique() checks for uniqueness # before calculating the unique values. res = algos.unique1d(res_indexer) @@ -5013,7 +5013,10 @@ def _can_use_libjoin(self) -> bool: ) # Exclude index types where the conversion to numpy converts to object dtype, # which negates the performance benefit of libjoin - # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone + # Subclasses should override to return False if _get_join_target is + # not zero-copy. + # TODO: exclude RangeIndex (which allocates memory)? + # Doing so seems to break test_concat_datetime_timezone return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex)) # -------------------------------------------------------------------- @@ -6169,8 +6172,8 @@ def _get_indexer_non_comparable( If doing an inequality check, i.e. method is not None. """ if method is not None: - other = _unpack_nested_dtype(target) - raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}") + other_dtype = _unpack_nested_dtype(target) + raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}") no_matches = -1 * np.ones(target.shape, dtype=np.intp) if unique: @@ -6281,8 +6284,7 @@ def _should_compare(self, other: Index) -> bool: # respectively. return False - other = _unpack_nested_dtype(other) - dtype = other.dtype + dtype = _unpack_nested_dtype(other) return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: @@ -7585,7 +7587,7 @@ def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: return names -def _unpack_nested_dtype(other: Index) -> Index: +def _unpack_nested_dtype(other: Index) -> DtypeObj: """ When checking if our dtype is comparable with another, we need to unpack CategoricalDtype to look at its categories.dtype. @@ -7596,20 +7598,20 @@ def _unpack_nested_dtype(other: Index) -> Index: Returns ------- - Index + np.dtype or ExtensionDtype """ dtype = other.dtype if isinstance(dtype, CategoricalDtype): # If there is ever a SparseIndex, this could get dispatched # here too. - return dtype.categories + return dtype.categories.dtype elif isinstance(dtype, ArrowDtype): # GH 53617 import pyarrow as pa if pa.types.is_dictionary(dtype.pyarrow_dtype): - other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type)) - return other + other = other[:0].astype(ArrowDtype(dtype.pyarrow_dtype.value_type)) + return other.dtype def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None): diff --git a/pandas/core/series.py b/pandas/core/series.py index d5785a2171cb3..7b12f4730d2fe 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4004,7 +4004,8 @@ def argsort( if mask.any(): # TODO(3.0): once this deprecation is enforced we can call - # self.array.argsort directly, which will close GH#43840 + # self.array.argsort directly, which will close GH#43840 and + # GH#12694 warnings.warn( "The behavior of Series.argsort in the presence of NA values is " "deprecated. In a future version, NA values will be ordered " diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index d96fc02e16d0d..1b1d9d7640058 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -88,7 +88,7 @@ def get_indexer_indexer( # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray[Any, Any], Index, Series]", variable has # type "Index") - target = ensure_key_mapped(target, key, levels=level) # type:ignore[assignment] + target = ensure_key_mapped(target, key, levels=level) # type: ignore[assignment] target = target._sort_levels_monotonic() if level is not None: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index a2693c85e507f..781bd646592ea 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2166,6 +2166,19 @@ def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype): result.loc[df.index, "data"] = ser._values tm.assert_frame_equal(result, df) + def test_loc_setitem_ea_not_full_column(self): + # GH#39163 + df = DataFrame({"A": range(5)}) + + val = date_range("2016-01-01", periods=3, tz="US/Pacific") + + df.loc[[0, 1, 2], "B"] = val + + bex = val.append(DatetimeIndex([pd.NaT, pd.NaT], dtype=val.dtype)) + expected = DataFrame({"A": range(5), "B": bex}) + assert expected.dtypes["B"] == val.dtype + tm.assert_frame_equal(df, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self): diff --git a/pandas/tests/tslibs/test_npy_units.py b/pandas/tests/tslibs/test_npy_units.py new file mode 100644 index 0000000000000..6d05dc79fbb2c --- /dev/null +++ b/pandas/tests/tslibs/test_npy_units.py @@ -0,0 +1,27 @@ +import numpy as np + +from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit +from pandas._libs.tslibs.vectorized import is_date_array_normalized + +# a datetime64 ndarray which *is* normalized +day_arr = np.arange(10, dtype="i8").view("M8[D]") + + +class TestIsDateArrayNormalized: + def test_is_date_array_normalized_day(self): + arr = day_arr + abbrev = "D" + unit = abbrev_to_npy_unit(abbrev) + result = is_date_array_normalized(arr.view("i8"), None, unit) + assert result is True + + def test_is_date_array_normalized_seconds(self): + abbrev = "s" + arr = day_arr.astype(f"M8[{abbrev}]") + unit = abbrev_to_npy_unit(abbrev) + result = is_date_array_normalized(arr.view("i8"), None, unit) + assert result is True + + arr[0] += np.timedelta64(1, abbrev) + result2 = is_date_array_normalized(arr.view("i8"), None, unit) + assert result2 is False From 939e4f863d46e574b139a37638041fa45714f53d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 3 Oct 2023 07:41:17 -0700 Subject: [PATCH 2/4] docstring fixup --- pandas/core/arrays/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 933944dbd4632..15448148f0c0c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1703,6 +1703,10 @@ def transpose(self, *axes: int) -> ExtensionArray: Because ExtensionArrays are always 1D, this is a no-op. It is included for compatibility with np.ndarray. + + Returns + ------- + ExtensionArray """ return self[:] From 34a827ec7518e6c4135c056f1228fb7457fe3559 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 3 Oct 2023 07:41:52 -0700 Subject: [PATCH 3/4] docstring fixup --- pandas/core/arrays/base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 15448148f0c0c..1b9924193483e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1707,6 +1707,13 @@ def transpose(self, *axes: int) -> ExtensionArray: Returns ------- ExtensionArray + + Examples + -------- + >>> pd.array([1, 2, 3]).transpose() + + [1, 2, 3] + Length: 3, dtype: Int64 """ return self[:] From d829feefb2ca5ec6608ee76cae909b8b4bc721e5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Oct 2023 08:29:07 -0700 Subject: [PATCH 4/4] revert doc --- doc/source/reference/extensions.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 161cc4b46fffc..e412793a328a3 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -68,8 +68,6 @@ objects. api.extensions.ExtensionArray.ndim api.extensions.ExtensionArray.shape api.extensions.ExtensionArray.tolist - api.extensions.ExtensionArray.transpose - api.extensions.ExtensionArray.T Additionally, we have some utility methods for ensuring your object behaves correctly.