Skip to content

Commit 47a2e38

Browse files
committed
BUG: fix CategoricalBlock pickling
* TST: add categorical frame and series to generate_legacy_pickles * TST: generate pickle for 0.15.0
1 parent a05b8ed commit 47a2e38

File tree

5 files changed

+26
-28
lines changed

5 files changed

+26
-28
lines changed

doc/source/v0.15.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,3 +1071,4 @@ Bug Fixes
10711071
- Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`)
10721072
- Bug in ``Series`` that allows it to be indexed by a ``DataFrame`` which has unexpected results. Such indexing is no longer permitted (:issue:`8444`)
10731073
- Bug in item assignment of a ``DataFrame`` with multi-index columns where right-hand-side columns were not aligned (:issue:`7655`)
1074+
- Bug in unpickling of categorical series and dataframe columns (:issue:`8518`)

pandas/core/internals.py

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,16 +1070,19 @@ class NonConsolidatableMixIn(object):
10701070
def __init__(self, values, placement,
10711071
ndim=None, fastpath=False,):
10721072

1073+
# Placement must be converted to BlockPlacement via property setter
1074+
# before ndim logic, because placement may be a slice which doesn't
1075+
# have a length.
1076+
self.mgr_locs = placement
1077+
10731078
# kludgetastic
10741079
if ndim is None:
1075-
if len(placement) != 1:
1080+
if len(self.mgr_locs) != 1:
10761081
ndim = 1
10771082
else:
10781083
ndim = 2
10791084
self.ndim = ndim
10801085

1081-
self.mgr_locs = placement
1082-
10831086
if not isinstance(values, self._holder):
10841087
raise TypeError("values must be {0}".format(self._holder.__name__))
10851088

@@ -1852,6 +1855,7 @@ def get_values(self, dtype=None):
18521855
.reshape(self.values.shape)
18531856
return self.values
18541857

1858+
18551859
class SparseBlock(NonConsolidatableMixIn, Block):
18561860
""" implement as a list of sparse arrays of the same dtype """
18571861
__slots__ = ()
@@ -1861,27 +1865,6 @@ class SparseBlock(NonConsolidatableMixIn, Block):
18611865
_ftype = 'sparse'
18621866
_holder = SparseArray
18631867

1864-
def __init__(self, values, placement,
1865-
ndim=None, fastpath=False,):
1866-
1867-
# Placement must be converted to BlockPlacement via property setter
1868-
# before ndim logic, because placement may be a slice which doesn't
1869-
# have a length.
1870-
self.mgr_locs = placement
1871-
1872-
# kludgetastic
1873-
if ndim is None:
1874-
if len(self.mgr_locs) != 1:
1875-
ndim = 1
1876-
else:
1877-
ndim = 2
1878-
self.ndim = ndim
1879-
1880-
if not isinstance(values, SparseArray):
1881-
raise TypeError("values must be SparseArray")
1882-
1883-
self.values = values
1884-
18851868
@property
18861869
def shape(self):
18871870
return (len(self.mgr_locs), self.sp_index.length)
Binary file not shown.

pandas/io/tests/generate_legacy_pickles.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def create_data():
6060
from pandas import (Series,TimeSeries,DataFrame,Panel,
6161
SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel,
6262
Index,MultiIndex,PeriodIndex,
63-
date_range,period_range,bdate_range,Timestamp)
63+
date_range,period_range,bdate_range,Timestamp,Categorical)
6464
nan = np.nan
6565

6666
data = {
@@ -85,7 +85,8 @@ def create_data():
8585
mi = Series(np.arange(5).astype(np.float64),index=MultiIndex.from_tuples(tuple(zip(*[[1,1,2,2,2],
8686
[3,4,3,4,5]])),
8787
names=['one','two'])),
88-
dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']))
88+
dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']),
89+
cat=Series(Categorical(['foo', 'bar', 'baz'])))
8990

9091
frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
9192
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
@@ -95,7 +96,11 @@ def create_data():
9596
['one','two','one','two','three']])),
9697
names=['first','second'])),
9798
dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
98-
columns=['A', 'B', 'A']))
99+
columns=['A', 'B', 'A']),
100+
cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))),
101+
cat_and_float=DataFrame(dict(A=Categorical(['foo', 'bar', 'baz']),
102+
B=np.arange(3))),
103+
)
99104
panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)),
100105
dup = Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64),
101106
items=['A', 'B', 'A']))

pandas/tests/test_internals.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pandas.util.testing as tm
1212
import pandas as pd
1313
from pandas.util.testing import (
14-
assert_almost_equal, assert_frame_equal, randn)
14+
assert_almost_equal, assert_frame_equal, randn, assert_series_equal)
1515
from pandas.compat import zip, u
1616

1717

@@ -363,6 +363,15 @@ def test_non_unique_pickle(self):
363363
mgr2 = self.round_trip_pickle(mgr)
364364
assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
365365

366+
def test_categorical_block_pickle(self):
367+
mgr = create_mgr('a: category')
368+
mgr2 = self.round_trip_pickle(mgr)
369+
assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
370+
371+
smgr = create_single_mgr('category')
372+
smgr2 = self.round_trip_pickle(smgr)
373+
assert_series_equal(Series(smgr), Series(smgr2))
374+
366375
def test_get_scalar(self):
367376
for item in self.mgr.items:
368377
for i, index in enumerate(self.mgr.axes[1]):

0 commit comments

Comments
 (0)