diff --git a/doc/redirects.csv b/doc/redirects.csv index 97cd20b295e65..bd60cc6a732bd 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -127,7 +127,6 @@ generated/pandas.api.types.is_number,../reference/api/pandas.api.types.is_number generated/pandas.api.types.is_numeric_dtype,../reference/api/pandas.api.types.is_numeric_dtype generated/pandas.api.types.is_object_dtype,../reference/api/pandas.api.types.is_object_dtype generated/pandas.api.types.is_period_dtype,../reference/api/pandas.api.types.is_period_dtype -generated/pandas.api.types.is_period,../reference/api/pandas.api.types.is_period generated/pandas.api.types.is_re_compilable,../reference/api/pandas.api.types.is_re_compilable generated/pandas.api.types.is_re,../reference/api/pandas.api.types.is_re generated/pandas.api.types.is_scalar,../reference/api/pandas.api.types.is_scalar diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index 41f4b4d5783ea..e0aa8be066914 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -528,7 +528,7 @@ If a test is known to fail but the manner in which it fails is not meant to be captured, use ``pytest.mark.xfail`` It is common to use this method for a test that exhibits buggy behavior or a non-implemented feature. If the failing test has flaky behavior, use the argument ``strict=False``. This -will make it so pytest does not fail if the test happens to pass. +will make it so pytest does not fail if the test happens to pass. Using ``strict=False`` is highly undesirable, please use it only as a last resort. Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param`` over usage within a test so that the test is appropriately marked during the diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 177c105688d0c..31c143ee012bb 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1729,6 +1729,17 @@ def transpose(self, *axes: int) -> ExtensionArray: Because ExtensionArrays are always 1D, this is a no-op. It is included for compatibility with np.ndarray. + + Returns + ------- + ExtensionArray + + Examples + -------- + >>> pd.array([1, 2, 3]).transpose() + + [1, 2, 3] + Length: 3, dtype: Int64 """ return self[:] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 11f2cc8ebf1ff..cb22ad456c9b5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3501,7 +3501,7 @@ def _intersection(self, other: Index, sort: bool = False): pass else: # TODO: algos.unique1d should preserve DTA/TDA - if is_numeric_dtype(self): + if is_numeric_dtype(self.dtype): # This is faster, because Index.unique() checks for uniqueness # before calculating the unique values. res = algos.unique1d(res_indexer) @@ -5020,7 +5020,10 @@ def _can_use_libjoin(self) -> bool: ) # Exclude index types where the conversion to numpy converts to object dtype, # which negates the performance benefit of libjoin - # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone + # Subclasses should override to return False if _get_join_target is + # not zero-copy. + # TODO: exclude RangeIndex (which allocates memory)? + # Doing so seems to break test_concat_datetime_timezone return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex)) # -------------------------------------------------------------------- @@ -6176,8 +6179,8 @@ def _get_indexer_non_comparable( If doing an inequality check, i.e. method is not None. """ if method is not None: - other = _unpack_nested_dtype(target) - raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}") + other_dtype = _unpack_nested_dtype(target) + raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}") no_matches = -1 * np.ones(target.shape, dtype=np.intp) if unique: @@ -6288,8 +6291,7 @@ def _should_compare(self, other: Index) -> bool: # respectively. return False - other = _unpack_nested_dtype(other) - dtype = other.dtype + dtype = _unpack_nested_dtype(other) return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: @@ -7592,7 +7594,7 @@ def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: return names -def _unpack_nested_dtype(other: Index) -> Index: +def _unpack_nested_dtype(other: Index) -> DtypeObj: """ When checking if our dtype is comparable with another, we need to unpack CategoricalDtype to look at its categories.dtype. @@ -7603,20 +7605,20 @@ def _unpack_nested_dtype(other: Index) -> Index: Returns ------- - Index + np.dtype or ExtensionDtype """ dtype = other.dtype if isinstance(dtype, CategoricalDtype): # If there is ever a SparseIndex, this could get dispatched # here too. - return dtype.categories + return dtype.categories.dtype elif isinstance(dtype, ArrowDtype): # GH 53617 import pyarrow as pa if pa.types.is_dictionary(dtype.pyarrow_dtype): - other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type)) - return other + other = other[:0].astype(ArrowDtype(dtype.pyarrow_dtype.value_type)) + return other.dtype def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None): diff --git a/pandas/core/series.py b/pandas/core/series.py index a5e692431e890..417a2515a33f0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4004,7 +4004,8 @@ def argsort( if mask.any(): # TODO(3.0): once this deprecation is enforced we can call - # self.array.argsort directly, which will close GH#43840 + # self.array.argsort directly, which will close GH#43840 and + # GH#12694 warnings.warn( "The behavior of Series.argsort in the presence of NA values is " "deprecated. In a future version, NA values will be ordered " diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index d96fc02e16d0d..1b1d9d7640058 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -88,7 +88,7 @@ def get_indexer_indexer( # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray[Any, Any], Index, Series]", variable has # type "Index") - target = ensure_key_mapped(target, key, levels=level) # type:ignore[assignment] + target = ensure_key_mapped(target, key, levels=level) # type: ignore[assignment] target = target._sort_levels_monotonic() if level is not None: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 59e0ca0591ced..2eee506e1feb7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2166,6 +2166,19 @@ def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype): result.loc[df.index, "data"] = ser._values tm.assert_frame_equal(result, df) + def test_loc_setitem_ea_not_full_column(self): + # GH#39163 + df = DataFrame({"A": range(5)}) + + val = date_range("2016-01-01", periods=3, tz="US/Pacific") + + df.loc[[0, 1, 2], "B"] = val + + bex = val.append(DatetimeIndex([pd.NaT, pd.NaT], dtype=val.dtype)) + expected = DataFrame({"A": range(5), "B": bex}) + assert expected.dtypes["B"] == val.dtype + tm.assert_frame_equal(df, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self): diff --git a/pandas/tests/tslibs/test_npy_units.py b/pandas/tests/tslibs/test_npy_units.py new file mode 100644 index 0000000000000..6d05dc79fbb2c --- /dev/null +++ b/pandas/tests/tslibs/test_npy_units.py @@ -0,0 +1,27 @@ +import numpy as np + +from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit +from pandas._libs.tslibs.vectorized import is_date_array_normalized + +# a datetime64 ndarray which *is* normalized +day_arr = np.arange(10, dtype="i8").view("M8[D]") + + +class TestIsDateArrayNormalized: + def test_is_date_array_normalized_day(self): + arr = day_arr + abbrev = "D" + unit = abbrev_to_npy_unit(abbrev) + result = is_date_array_normalized(arr.view("i8"), None, unit) + assert result is True + + def test_is_date_array_normalized_seconds(self): + abbrev = "s" + arr = day_arr.astype(f"M8[{abbrev}]") + unit = abbrev_to_npy_unit(abbrev) + result = is_date_array_normalized(arr.view("i8"), None, unit) + assert result is True + + arr[0] += np.timedelta64(1, abbrev) + result2 = is_date_array_normalized(arr.view("i8"), None, unit) + assert result2 is False