-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Handle extension arrays in algorithms.diff #31025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fcde96b
7c5e6f7
3cc7c11
5017912
dfea6a5
38fe40c
fc6eef0
84e5e93
4183b5b
ab9b23f
2f5d55f
e0ce8be
bd18da2
1c0a9fe
f3af8f5
4d0c5cf
6843e2b
bd6c157
7861f57
a496f13
869ce96
8fa2836
d34ffe3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
Generic data algorithms. This module is experimental at the moment and not | ||
intended for public consumption | ||
""" | ||
import operator | ||
from textwrap import dedent | ||
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union | ||
from warnings import catch_warnings, simplefilter, warn | ||
|
@@ -1812,7 +1813,7 @@ def searchsorted(arr, value, side="left", sorter=None): | |
_diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} | ||
|
||
|
||
def diff(arr, n: int, axis: int = 0): | ||
def diff(arr, n: int, axis: int = 0, stacklevel=3): | ||
""" | ||
difference of n between self, | ||
analogous to s-s.shift(n) | ||
|
@@ -1824,16 +1825,42 @@ def diff(arr, n: int, axis: int = 0): | |
number of periods | ||
axis : int | ||
axis to shift on | ||
stacklevel : int | ||
The stacklevel for the lost dtype warning. | ||
|
||
Returns | ||
------- | ||
shifted | ||
""" | ||
from pandas.core.arrays import PandasDtype | ||
|
||
n = int(n) | ||
na = np.nan | ||
dtype = arr.dtype | ||
|
||
if dtype.kind == "b": | ||
op = operator.xor | ||
else: | ||
op = operator.sub | ||
|
||
if isinstance(dtype, PandasDtype): | ||
# PandasArray cannot necessarily hold shifted versions of itself. | ||
arr = np.asarray(arr) | ||
dtype = arr.dtype | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would it be more idiomatic to do extract_array up front? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say not because we don't need to "extract" the array, since arrays are already passed (this doesn't get passed Series or Index objects). You can of course use |
||
|
||
if is_extension_array_dtype(dtype): | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if hasattr(arr, f"__{op.__name__}__"): | ||
return op(arr, arr.shift(n)) | ||
else: | ||
warn( | ||
"dtype lost in 'diff()'. In the future this will raise a " | ||
"TypeError. Convert to a suitable dtype prior to calling 'diff'.", | ||
FutureWarning, | ||
stacklevel=stacklevel, | ||
) | ||
arr = np.asarray(arr) | ||
dtype = arr.dtype | ||
|
||
is_timedelta = False | ||
is_bool = False | ||
if needs_i8_conversion(arr): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1280,7 +1280,10 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): | |
|
||
def diff(self, n: int, axis: int = 1) -> List["Block"]: | ||
""" return block for the diff of the values """ | ||
new_values = algos.diff(self.values, n, axis=axis) | ||
new_values = algos.diff(self.values, n, axis=axis, stacklevel=7) | ||
# We use block_shape for ExtensionBlock subclasses, which may call here | ||
# via a super. | ||
new_values = _block_shape(new_values, ndim=self.ndim) | ||
return [self.make_block(values=new_values)] | ||
|
||
def shift(self, periods, axis=0, fill_value=None): | ||
|
@@ -1860,6 +1863,12 @@ def interpolate( | |
placement=self.mgr_locs, | ||
) | ||
|
||
def diff(self, n: int, axis: int = 1) -> List["Block"]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note this is on ExtensionBlock now. The deprecation for EAs not implementing |
||
if axis == 1: | ||
# we are by definition 1D. | ||
axis = 0 | ||
return super().diff(n, axis) | ||
|
||
def shift( | ||
self, | ||
periods: int, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -248,6 +248,10 @@ def test_repeat(self, data, repeats, as_series, use_numpy): | |
# Fails creating expected | ||
super().test_repeat(data, repeats, as_series, use_numpy) | ||
|
||
@pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") | ||
def test_diff(self, data, periods): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this skipped? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either a bug or not implemented behavior in PandasArray. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe xfail it then? |
||
return super().test_diff(data, periods) | ||
|
||
|
||
@skip_nested | ||
class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): | ||
|
Uh oh!
There was an error while loading. Please reload this page.