Skip to content

CLN: assorted #55359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/redirects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ generated/pandas.api.types.is_number,../reference/api/pandas.api.types.is_number
generated/pandas.api.types.is_numeric_dtype,../reference/api/pandas.api.types.is_numeric_dtype
generated/pandas.api.types.is_object_dtype,../reference/api/pandas.api.types.is_object_dtype
generated/pandas.api.types.is_period_dtype,../reference/api/pandas.api.types.is_period_dtype
generated/pandas.api.types.is_period,../reference/api/pandas.api.types.is_period
generated/pandas.api.types.is_re_compilable,../reference/api/pandas.api.types.is_re_compilable
generated/pandas.api.types.is_re,../reference/api/pandas.api.types.is_re
generated/pandas.api.types.is_scalar,../reference/api/pandas.api.types.is_scalar
Expand Down
2 changes: 1 addition & 1 deletion doc/source/development/contributing_codebase.rst
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ If a test is known to fail but the manner in which it fails
is not meant to be captured, use ``pytest.mark.xfail`` It is common to use this method for a test that
exhibits buggy behavior or a non-implemented feature. If
the failing test has flaky behavior, use the argument ``strict=False``. This
will make it so pytest does not fail if the test happens to pass.
will make it so pytest does not fail if the test happens to pass. Using ``strict=False`` is highly undesirable, please use it only as a last resort.

Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param``
over usage within a test so that the test is appropriately marked during the
Expand Down
11 changes: 11 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1729,6 +1729,17 @@ def transpose(self, *axes: int) -> ExtensionArray:

Because ExtensionArrays are always 1D, this is a no-op. It is included
for compatibility with np.ndarray.

Returns
-------
ExtensionArray

Examples
--------
>>> pd.array([1, 2, 3]).transpose()
<IntegerArray>
[1, 2, 3]
Length: 3, dtype: Int64
"""
return self[:]

Expand Down
24 changes: 13 additions & 11 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3501,7 +3501,7 @@ def _intersection(self, other: Index, sort: bool = False):
pass
else:
# TODO: algos.unique1d should preserve DTA/TDA
if is_numeric_dtype(self):
if is_numeric_dtype(self.dtype):
# This is faster, because Index.unique() checks for uniqueness
# before calculating the unique values.
res = algos.unique1d(res_indexer)
Expand Down Expand Up @@ -5020,7 +5020,10 @@ def _can_use_libjoin(self) -> bool:
)
# Exclude index types where the conversion to numpy converts to object dtype,
# which negates the performance benefit of libjoin
# TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
# Subclasses should override to return False if _get_join_target is
# not zero-copy.
# TODO: exclude RangeIndex (which allocates memory)?
# Doing so seems to break test_concat_datetime_timezone
return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex))

# --------------------------------------------------------------------
Expand Down Expand Up @@ -6176,8 +6179,8 @@ def _get_indexer_non_comparable(
If doing an inequality check, i.e. method is not None.
"""
if method is not None:
other = _unpack_nested_dtype(target)
raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")
other_dtype = _unpack_nested_dtype(target)
raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}")

no_matches = -1 * np.ones(target.shape, dtype=np.intp)
if unique:
Expand Down Expand Up @@ -6288,8 +6291,7 @@ def _should_compare(self, other: Index) -> bool:
# respectively.
return False

other = _unpack_nested_dtype(other)
dtype = other.dtype
dtype = _unpack_nested_dtype(other)
return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
Expand Down Expand Up @@ -7592,7 +7594,7 @@ def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
return names


def _unpack_nested_dtype(other: Index) -> Index:
def _unpack_nested_dtype(other: Index) -> DtypeObj:
"""
When checking if our dtype is comparable with another, we need
to unpack CategoricalDtype to look at its categories.dtype.
Expand All @@ -7603,20 +7605,20 @@ def _unpack_nested_dtype(other: Index) -> Index:

Returns
-------
Index
np.dtype or ExtensionDtype
"""
dtype = other.dtype
if isinstance(dtype, CategoricalDtype):
# If there is ever a SparseIndex, this could get dispatched
# here too.
return dtype.categories
return dtype.categories.dtype
elif isinstance(dtype, ArrowDtype):
# GH 53617
import pyarrow as pa

if pa.types.is_dictionary(dtype.pyarrow_dtype):
other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
return other
other = other[:0].astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
return other.dtype


def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4004,7 +4004,8 @@ def argsort(

if mask.any():
# TODO(3.0): once this deprecation is enforced we can call
# self.array.argsort directly, which will close GH#43840
# self.array.argsort directly, which will close GH#43840 and
# GH#12694
warnings.warn(
"The behavior of Series.argsort in the presence of NA values is "
"deprecated. In a future version, NA values will be ordered "
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def get_indexer_indexer(
# error: Incompatible types in assignment (expression has type
# "Union[ExtensionArray, ndarray[Any, Any], Index, Series]", variable has
# type "Index")
target = ensure_key_mapped(target, key, levels=level) # type:ignore[assignment]
target = ensure_key_mapped(target, key, levels=level) # type: ignore[assignment]
target = target._sort_levels_monotonic()

if level is not None:
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2166,6 +2166,19 @@ def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype):
result.loc[df.index, "data"] = ser._values
tm.assert_frame_equal(result, df)

def test_loc_setitem_ea_not_full_column(self):
# GH#39163
df = DataFrame({"A": range(5)})

val = date_range("2016-01-01", periods=3, tz="US/Pacific")

df.loc[[0, 1, 2], "B"] = val

bex = val.append(DatetimeIndex([pd.NaT, pd.NaT], dtype=val.dtype))
expected = DataFrame({"A": range(5), "B": bex})
assert expected.dtypes["B"] == val.dtype
tm.assert_frame_equal(df, expected)


class TestLocCallable:
def test_frame_loc_getitem_callable(self):
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/tslibs/test_npy_units.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np

from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
from pandas._libs.tslibs.vectorized import is_date_array_normalized

# a datetime64 ndarray which *is* normalized
day_arr = np.arange(10, dtype="i8").view("M8[D]")


class TestIsDateArrayNormalized:
def test_is_date_array_normalized_day(self):
arr = day_arr
abbrev = "D"
unit = abbrev_to_npy_unit(abbrev)
result = is_date_array_normalized(arr.view("i8"), None, unit)
assert result is True

def test_is_date_array_normalized_seconds(self):
abbrev = "s"
arr = day_arr.astype(f"M8[{abbrev}]")
unit = abbrev_to_npy_unit(abbrev)
result = is_date_array_normalized(arr.view("i8"), None, unit)
assert result is True

arr[0] += np.timedelta64(1, abbrev)
result2 = is_date_array_normalized(arr.view("i8"), None, unit)
assert result2 is False