-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
ENH: fill_value argument for shift #15486 #24128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 78 commits
c3e550c
9ab2d4d
6b4789d
57c087e
1852ea6
3461271
8b5cedb
27226fa
29b8a6e
cf6b2dd
9f9962c
4c7b762
b36ca43
a36e7c7
4c2ec2c
fbf3d73
bb722a6
1b1fdc2
c3f462c
e44b514
38f621c
d9d9fdb
cc6f370
3619410
ca5ba24
c195a13
af90a00
2f9b712
bbfe7f9
ef55afb
ce3d3c7
e6103a4
d18b6b7
931df66
83f9157
81c01bc
4ae4d0b
d4d43a3
1366161
2643aa5
c010413
689bf8e
463964b
b90b00f
18fab2f
cb03215
c8242f3
8e946cc
69e47f3
8765bee
9d10a6b
6f4078a
4227dda
ce721ae
c743004
03e3bd4
e4313da
85d2b16
da90e89
d71be8a
aca6c9c
20361c7
599ccb7
1ac273a
b2074a8
0fe2f95
7d33f21
578859b
a016df3
3947394
b266a50
fa808a4
71b8df1
31a844e
18cbd95
d068150
e665f7d
c5a95cb
b03a3fd
d9efb45
60595c9
7fc9900
e1a83e2
7b587c2
7bf6768
09f0fde
6d65cfa
2162c18
3f9b62b
8fa8a15
988507a
6c96108
eb48cfe
a72cffe
25b4661
d817d0c
0afad10
bb67905
f5aad59
025f0db
8ce460a
dce5aa1
d60632a
a0ab35d
fd23842
3503e86
674f15d
3c896ab
b9d335a
4fcca01
a24f5c8
a92ddfb
a03cbf5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -457,7 +457,7 @@ def value_counts(self, dropna=False): | |
|
||
# -------------------------------------------------------------------- | ||
|
||
def shift(self, periods=1): | ||
def shift(self, periods=1, fill_value=None): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Shift values by desired number. | ||
|
||
|
@@ -471,6 +471,8 @@ def shift(self, periods=1): | |
periods : int, default 1 | ||
The number of periods to shift. Negative values are allowed | ||
for shifting backwards. | ||
fill_value : optional, default NaT | ||
.. versionadded:: 0.24.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need a blank line here |
||
|
||
Returns | ||
------- | ||
|
@@ -479,7 +481,7 @@ def shift(self, periods=1): | |
# TODO(DatetimeArray): remove | ||
# The semantics for Index.shift differ from EA.shift | ||
# then just call super. | ||
return ExtensionArray.shift(self, periods) | ||
return ExtensionArray.shift(self, periods, fill_value=fill_value) | ||
|
||
def _time_shift(self, n, freq=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8839,6 +8839,12 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, | |
extend the index when shifting and preserve the original data. | ||
axis : {0 or 'index', 1 or 'columns', None}, default None | ||
Shift direction. | ||
fill_value : object, optional | ||
the scalar value to use for newly introduced missing values. | ||
the default depends on the dtype of `self`. for numeric data, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Capitalize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
``np.nan`` is used. for datelike, ``pandas.nat`` is used. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Capitalize. Maybe replace dateline with "datetime, timedelta, or period data, ..." change ``pandas.nat`` to :attr:`NaT`. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And could you add that "For extension dtypes, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
.. versionchanged:: 0.24.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -8874,16 +8880,25 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, | |
2 NaN 15.0 18.0 | ||
3 NaN 30.0 33.0 | ||
4 NaN 45.0 48.0 | ||
|
||
>>> df.shift(periods=3, fill_value=0.0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use a fill_value=0, so these stay as ints |
||
Col1 Col2 Col3 | ||
0 0.0 0.0 0.0 | ||
1 0.0 0.0 0.0 | ||
2 0.0 0.0 0.0 | ||
3 10.0 13.0 17.0 | ||
4 20.0 23.0 27.0 | ||
""") | ||
|
||
@Appender(_shared_docs['shift'] % _shared_doc_kwargs) | ||
def shift(self, periods=1, freq=None, axis=0): | ||
def shift(self, periods=1, freq=None, axis=0, fill_value=None): | ||
if periods == 0: | ||
return self.copy() | ||
|
||
block_axis = self._get_block_manager_axis(axis) | ||
if freq is None: | ||
new_data = self._data.shift(periods=periods, axis=block_axis) | ||
new_data = self._data.shift(periods=periods, axis=block_axis, | ||
fill_value=fill_value) | ||
else: | ||
return self.tshift(periods, freq) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1992,7 +1992,7 @@ def _get_cythonized_result(self, how, grouper, aggregate=False, | |
|
||
@Substitution(name='groupby') | ||
@Appender(_doc_template) | ||
def shift(self, periods=1, freq=None, axis=0): | ||
def shift(self, periods=1, freq=None, axis=0, fill_value=None): | ||
""" | ||
Shift each group by periods observations. | ||
|
||
|
@@ -2002,10 +2002,13 @@ def shift(self, periods=1, freq=None, axis=0): | |
number of periods to shift | ||
freq : frequency string | ||
axis : axis to shift, default 0 | ||
fill_value : optional | ||
.. versionadded:: 0.24.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. blank line here |
||
""" | ||
|
||
if freq is not None or axis != 0: | ||
return self.apply(lambda x: x.shift(periods, freq, axis)) | ||
if freq is not None or axis != 0 or fill_value is not None: | ||
return self.apply(lambda x: x.shift(periods, freq, | ||
axis, fill_value)) | ||
|
||
return self._get_cythonized_result('group_shift_indexer', | ||
self.grouper, cython_dtype=np.int64, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1260,12 +1260,12 @@ def diff(self, n, axis=1): | |
new_values = algos.diff(self.values, n, axis=axis) | ||
return [self.make_block(values=new_values)] | ||
|
||
def shift(self, periods, axis=0): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def shift(self, periods, axis=0, fill_value=None): | ||
""" shift the block by periods, possibly upcast """ | ||
|
||
# convert integer to float if necessary. need to do a lot more than | ||
# that, handle boolean etc also | ||
new_values, fill_value = maybe_upcast(self.values) | ||
new_values, fill_value = maybe_upcast(self.values, fill_value) | ||
|
||
# make sure array sent to np.roll is c_contiguous | ||
f_ordered = new_values.flags.f_contiguous | ||
|
@@ -1955,17 +1955,19 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=None, | |
limit=limit), | ||
placement=self.mgr_locs) | ||
|
||
def shift(self, periods, axis=0): | ||
def shift(self, periods, axis=0, fill_value=None): | ||
""" | ||
Shift the block by `periods`. | ||
|
||
Dispatches to underlying ExtensionArray and re-boxes in an | ||
ExtensionBlock. | ||
""" | ||
# type: (int, Optional[BlockPlacement]) -> List[ExtensionBlock] | ||
return [self.make_block_same_class(self.values.shift(periods=periods), | ||
placement=self.mgr_locs, | ||
ndim=self.ndim)] | ||
return [ | ||
self.make_block_same_class( | ||
self.values.shift(periods=periods, fill_value=fill_value), | ||
placement=self.mgr_locs, ndim=self.ndim) | ||
] | ||
|
||
@property | ||
def _ftype(self): | ||
|
@@ -2945,7 +2947,7 @@ def _try_coerce_result(self, result): | |
def _box_func(self): | ||
return lambda x: tslibs.Timestamp(x, tz=self.dtype.tz) | ||
|
||
def shift(self, periods, axis=0): | ||
def shift(self, periods, axis=0, fill_value=None): | ||
""" shift the block by periods """ | ||
|
||
# think about moving this to the DatetimeIndex. This is a non-freq | ||
|
@@ -2960,10 +2962,12 @@ def shift(self, periods, axis=0): | |
|
||
new_values = self.values.asi8.take(indexer) | ||
|
||
if fill_value is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use |
||
fill_value = tslibs.iNaT | ||
if periods > 0: | ||
new_values[:periods] = tslibs.iNaT | ||
new_values[:periods] = fill_value | ||
else: | ||
new_values[periods:] = tslibs.iNaT | ||
new_values[periods:] = fill_value | ||
|
||
new_values = self.values._shallow_copy(new_values) | ||
return [self.make_block_same_class(new_values, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -262,6 +262,20 @@ def test_take_negative(self): | |
exp = SparseArray(np.take(self.arr_data, [-4, -3, -2])) | ||
tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) | ||
|
||
def test_shift_fill_value(self): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# GH #24128 | ||
fill_values = [0, None, np.nan] | ||
for fill_value in fill_values: | ||
print(fill_value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove the print. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||
sparse = SparseArray(np.array([1, 0, 0, 3, 0]), | ||
fill_value=8.0) | ||
res = sparse.shift(1, fill_value=fill_value) | ||
if fill_value is None: | ||
fill_value = res.dtype.na_value | ||
exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), | ||
fill_value=8.0) | ||
tm.assert_sp_array_equal(res, exp) | ||
|
||
def test_bad_take(self): | ||
with pytest.raises(IndexError, match="bounds"): | ||
self.arr.take([11]) | ||
|
Uh oh!
There was an error while loading. Please reload this page.