Skip to content

Commit 6955de6

Browse files
committed
BUG: CategoricalBlock shift GH9416
CategoricalBlocks always seem to have ndim=1, even if multiple categoricals are in a frame with the same categories. This simplifies the axis shift logic somewhat.
1 parent bbec57d commit 6955de6

File tree

6 files changed

+83
-0
lines changed

6 files changed

+83
-0
lines changed

doc/source/whatsnew/v0.17.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ Bug Fixes
128128

129129

130130
- Bug in ``test_categorical`` on big-endian builds (:issue:`10425`)
131+
- Bug in ``Series.shift`` and ``DataFrame.shift`` not supporting categorical data (:issue:`9416`)
131132
- Bug in ``Series.map`` using categorical ``Series`` raises ``AttributeError`` (:issue:`10324`)
132133
- Bug in ``MultiIndex.get_level_values`` including ``Categorical`` raises ``AttributeError`` (:issue:`10460`)
133134

pandas/core/categorical.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,35 @@ def shape(self):
820820

821821
return tuple([len(self._codes)])
822822

823+
def shift(self, periods):
824+
"""
825+
Shift Categorical by desired number of periods.
826+
827+
Parameters
828+
----------
829+
periods : int
830+
Number of periods to move, can be positive or negative
831+
832+
Returns
833+
-------
834+
shifted : Categorical
835+
"""
836+
# since categoricals always have ndim == 1, an axis parameter
837+
# doesnt make any sense here.
838+
codes = self.codes
839+
if codes.ndim > 1:
840+
raise NotImplementedError("Categorical with ndim > 1.")
841+
if np.prod(codes.shape) and (periods != 0):
842+
codes = np.roll(codes, com._ensure_platform_int(periods), axis=0)
843+
if periods > 0:
844+
codes[:periods] = -1
845+
else:
846+
codes[periods:] = -1
847+
848+
return Categorical.from_codes(codes,
849+
categories=self.categories,
850+
ordered=self.ordered)
851+
823852
def __array__(self, dtype=None):
824853
"""
825854
The numpy array interface.

pandas/core/internals.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,10 @@ def interpolate(self, method='pad', axis=0, inplace=False,
17091709
limit=limit),
17101710
placement=self.mgr_locs)
17111711

1712+
def shift(self, periods, axis=0):
1713+
return self.make_block_same_class(values=self.values.shift(periods),
1714+
placement=self.mgr_locs)
1715+
17121716
def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
17131717
"""
17141718
Take values according to indexer and return them as a block.bb

pandas/tests/test_categorical.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,26 @@ def test_set_item_nan(self):
10801080
exp = np.array([0,1,3,2])
10811081
self.assert_numpy_array_equal(cat.codes, exp)
10821082

1083+
def test_shift(self):
1084+
# GH 9416
1085+
cat = pd.Categorical(['a', 'b', 'c', 'd', 'a'])
1086+
1087+
# shift forward
1088+
sp1 = cat.shift(1)
1089+
xp1 = pd.Categorical([np.nan, 'a', 'b', 'c', 'd'])
1090+
self.assert_categorical_equal(sp1, xp1)
1091+
self.assert_categorical_equal(cat[:-1], sp1[1:])
1092+
1093+
# shift back
1094+
sn2 = cat.shift(-2)
1095+
xp2 = pd.Categorical(['c', 'd', 'a', np.nan, np.nan],
1096+
categories=['a', 'b', 'c', 'd'])
1097+
self.assert_categorical_equal(sn2, xp2)
1098+
self.assert_categorical_equal(cat[2:], sn2[:-2])
1099+
1100+
# shift by zero
1101+
self.assert_categorical_equal(cat, cat.shift(0))
1102+
10831103
def test_nbytes(self):
10841104
cat = pd.Categorical([1,2,3])
10851105
exp = cat._codes.nbytes + cat._categories.values.nbytes

pandas/tests/test_frame.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10360,6 +10360,15 @@ def test_shift_bool(self):
1036010360
columns=['high', 'low'])
1036110361
assert_frame_equal(rs, xp)
1036210362

10363+
def test_shift_categorical(self):
10364+
# GH 9416
10365+
s1 = pd.Series(['a', 'b', 'c'], dtype='category')
10366+
s2 = pd.Series(['A', 'B', 'C'], dtype='category')
10367+
df = DataFrame({'one': s1, 'two': s2})
10368+
rs = df.shift(1)
10369+
xp = DataFrame({'one': s1.shift(1), 'two': s2.shift(1)})
10370+
assert_frame_equal(rs, xp)
10371+
1036310372
def test_shift_empty(self):
1036410373
# Regression test for #8019
1036510374
df = DataFrame({'foo': []})

pandas/tests/test_series.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from pandas.util.testing import (assert_series_equal,
3636
assert_almost_equal,
3737
assert_frame_equal,
38+
assert_index_equal,
3839
ensure_clean)
3940
import pandas.util.testing as tm
4041

@@ -5260,6 +5261,25 @@ def test_shift_int(self):
52605261
expected = ts.astype(float).shift(1)
52615262
assert_series_equal(shifted, expected)
52625263

5264+
def test_shift_categorical(self):
5265+
# GH 9416
5266+
s = pd.Series(['a', 'b', 'c', 'd'], dtype='category')
5267+
5268+
assert_series_equal(s.iloc[:-1], s.shift(1).shift(-1).valid())
5269+
5270+
sp1 = s.shift(1)
5271+
assert_index_equal(s.index, sp1.index)
5272+
self.assertTrue(np.all(sp1.values.codes[:1] == -1))
5273+
self.assertTrue(np.all(s.values.codes[:-1] == sp1.values.codes[1:]))
5274+
5275+
sn2 = s.shift(-2)
5276+
assert_index_equal(s.index, sn2.index)
5277+
self.assertTrue(np.all(sn2.values.codes[-2:] == -1))
5278+
self.assertTrue(np.all(s.values.codes[2:] == sn2.values.codes[:-2]))
5279+
5280+
assert_index_equal(s.values.categories, sp1.values.categories)
5281+
assert_index_equal(s.values.categories, sn2.values.categories)
5282+
52635283
def test_truncate(self):
52645284
offset = datetools.bday
52655285

0 commit comments

Comments
 (0)