diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 58bbf70c0bea9..bf52fc30a9ea3 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -1,12 +1,13 @@ """ support pre 0.12 series pickle compatibility """ import sys -import pickle import numpy as np import pandas +import pickle as pkl from pandas import compat -from pandas.core.series import Series -from pandas.sparse.series import SparseSeries +from pandas.compat import u, string_types +from pandas.core.series import Series, TimeSeries +from pandas.sparse.series import SparseSeries, SparseTimeSeries def load_reduce(self): stack = self.stack @@ -14,49 +15,89 @@ def load_reduce(self): func = stack[-1] if type(args[0]) is type: n = args[0].__name__ - if n == 'DeprecatedSeries': + if n == u('DeprecatedSeries') or n == u('DeprecatedTimeSeries'): stack[-1] = object.__new__(Series) return - elif n == 'DeprecatedSparseSeries': + elif n == u('DeprecatedSparseSeries') or n == u('DeprecatedSparseTimeSeries'): stack[-1] = object.__new__(SparseSeries) return try: value = func(*args) except: - print(sys.exc_info()) - print(func, args) + + # try to reencode the arguments + if self.encoding is not None: + args = tuple([ arg.encode(self.encoding) if isinstance(arg, string_types) else arg for arg in args ]) + try: + stack[-1] = func(*args) + return + except: + pass + + if self.is_verbose: + print(sys.exc_info()) + print(func, args) raise stack[-1] = value if compat.PY3: - class Unpickler(pickle._Unpickler): + class Unpickler(pkl._Unpickler): pass else: - class Unpickler(pickle.Unpickler): + class Unpickler(pkl.Unpickler): pass -Unpickler.dispatch[pickle.REDUCE[0]] = load_reduce +Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + +def load(fh, encoding=None, compat=False, is_verbose=False): + """ + load a pickle, with a provided encoding -def load(file): - # try to load a compatibility pickle - # fake the old class hierarchy - # if it works, then return the new type objects + if compat is True: + fake the old class hierarchy + if it works, then return the new type objects + + Parameters + ---------- + fh: a filelike object + encoding: an optional encoding + compat: provide Series compatibility mode, boolean, default False + is_verbose: show exception output + """ try: - pandas.core.series.Series = DeprecatedSeries - pandas.sparse.series.SparseSeries = DeprecatedSparseSeries - with open(file,'rb') as fh: - return Unpickler(fh).load() + if compat: + pandas.core.series.Series = DeprecatedSeries + pandas.core.series.TimeSeries = DeprecatedTimeSeries + pandas.sparse.series.SparseSeries = DeprecatedSparseSeries + pandas.sparse.series.SparseTimeSeries = DeprecatedSparseTimeSeries + fh.seek(0) + if encoding is not None: + up = Unpickler(fh, encoding=encoding) + else: + up = Unpickler(fh) + up.is_verbose = is_verbose + + return up.load() except: raise finally: - pandas.core.series.Series = Series - pandas.sparse.series.SparseSeries = SparseSeries + if compat: + pandas.core.series.Series = Series + pandas.core.series.Series = TimeSeries + pandas.sparse.series.SparseSeries = SparseSeries + pandas.sparse.series.SparseTimeSeries = SparseTimeSeries -class DeprecatedSeries(Series, np.ndarray): +class DeprecatedSeries(np.ndarray, Series): + pass + +class DeprecatedTimeSeries(DeprecatedSeries): pass class DeprecatedSparseSeries(DeprecatedSeries): pass + +class DeprecatedSparseTimeSeries(DeprecatedSparseSeries): + pass diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 80d5112fa3054..e0ee99455d238 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1845,6 +1845,11 @@ def __setstate__(self, state): blocks = [] for values, items in zip(bvalues, bitems): + + # numpy < 1.7 pickle compat + if values.dtype == 'M8[us]': + values = values.astype('M8[ns]') + blk = make_block(values, items, self.axes[0]) blocks.append(blk) self.blocks = blocks diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index efa8bdb0b123b..af1b333312309 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,4 @@ -from pandas.compat import cPickle as pkl, PY3 +from pandas.compat import cPickle as pkl, pickle_compat as pc, PY3 def to_pickle(obj, path): """ @@ -31,11 +31,23 @@ def read_pickle(path): ------- unpickled : type of object stored in file """ + + def try_read(path, encoding=None): + # try with current pickle, if we have a Type Error then + # try with the compat pickle to handle subclass changes + # pass encoding only if its not None as py2 doesn't handle + # the param + try: + with open(path,'rb') as fh: + with open(path,'rb') as fh: + return pc.load(fh, encoding=encoding, compat=False) + except: + with open(path,'rb') as fh: + return pc.load(fh, encoding=encoding, compat=True) + try: - with open(path, 'rb') as fh: - return pkl.load(fh) + return try_read(path) except: if PY3: - with open(path, 'rb') as fh: - return pkl.load(fh, encoding='latin1') + return try_read(path, encoding='latin1') raise diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle b/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle new file mode 100644 index 0000000000000..6b471d55b1642 Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle differ diff --git a/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_i686_linux_2.7.3.pickle b/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_i686_linux_2.7.3.pickle new file mode 100644 index 0000000000000..17061f6b7dc0f Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_i686_linux_2.7.3.pickle differ diff --git a/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle b/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle new file mode 100644 index 0000000000000..470d3e89c433d Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle differ diff --git a/pandas/io/tests/data/legacy_pickle/0.12.0/x86_64_linux_2.7.3.pickle b/pandas/io/tests/data/legacy_pickle/0.12.0/x86_64_linux_2.7.3.pickle new file mode 100644 index 0000000000000..e8c1e52078f7c Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.12.0/x86_64_linux_2.7.3.pickle differ diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.12.0-300-g6ffed43_x86_64_linux_2.7.3.pickle b/pandas/io/tests/data/legacy_pickle/0.13.0/0.12.0-300-g6ffed43_x86_64_linux_2.7.3.pickle new file mode 100644 index 0000000000000..93e1f3e6c9607 Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.13.0/0.12.0-300-g6ffed43_x86_64_linux_2.7.3.pickle differ diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index f5d949e2cfc45..f54a67b7f76cf 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -1,26 +1,47 @@ """ self-contained to write legacy pickle files """ from __future__ import print_function -from pandas.compat import zip, cPickle as pickle +# make sure we are < 0.13 compat (in py3) +try: + from pandas.compat import zip, cPickle as pickle +except: + import pickle def _create_sp_series(): import numpy as np - from pandas import bdate_range, SparseSeries + from pandas import SparseSeries nan = np.nan # nan-based - arr = np.arange(15, dtype=float) + arr = np.arange(15, dtype=np.float64) index = np.arange(15) arr[7:12] = nan arr[-1:] = nan - date_index = bdate_range('1/1/2011', periods=len(index)) - bseries = SparseSeries(arr, index=index, kind='block') + bseries = SparseSeries(arr, kind='block') bseries.name = 'bseries' return bseries +def _create_sp_tsseries(): + + import numpy as np + from pandas import bdate_range, SparseTimeSeries + + nan = np.nan + + # nan-based + arr = np.arange(15, dtype=np.float64) + index = np.arange(15) + arr[7:12] = nan + arr[-1:] = nan + + date_index = bdate_range('1/1/2011', periods=len(index)) + bseries = SparseTimeSeries(arr, index=date_index, kind='block') + bseries.name = 'btsseries' + return bseries + def _create_sp_frame(): import numpy as np from pandas import bdate_range, SparseDataFrame @@ -29,7 +50,7 @@ def _create_sp_frame(): data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C': np.arange(10), + 'C': np.arange(10).astype(np.int64), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} dates = bdate_range('1/1/2011', periods=10) @@ -40,8 +61,8 @@ def create_data(): import numpy as np import pandas - from pandas import (Series,DataFrame,Panel, - SparseSeries,SparseDataFrame,SparsePanel, + from pandas import (Series,TimeSeries,DataFrame,Panel, + SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel, Index,MultiIndex,PeriodIndex, date_range,bdate_range,Timestamp) nan = np.nan @@ -61,10 +82,11 @@ def create_data(): names=['first', 'second'])) series = dict(float = Series(data['A']), int = Series(data['B']), - mixed = Series(data['E'])) + mixed = Series(data['E']), + ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10))) frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)), - int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), - mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']]))) + int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), + mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']]))) panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1))) @@ -74,7 +96,8 @@ def create_data(): panel = panel, index = index, mi = mi, - sp_series = dict(float = _create_sp_series()), + sp_series = dict(float = _create_sp_series(), + ts = _create_sp_tsseries()), sp_frame = dict(float = _create_sp_frame()) ) @@ -92,24 +115,11 @@ def write_legacy_pickles(): print("This script generates a pickle file for the current arch, system, and python version") - base_dir, _ = os.path.split(os.path.abspath(__file__)) - base_dir = os.path.join(base_dir,'data/legacy_pickle') - - # could make this a parameter? - version = None - - - if version is None: - version = pandas.__version__ - pth = os.path.join(base_dir, str(version)) - try: - os.mkdir(pth) - except: - pass + version = pandas.__version__ # construct a reasonable platform name - f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ]) - pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f)) + f = '_'.join([ str(version), str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ]) + pth = '{0}.pickle'.format(f) fh = open(pth,'wb') pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index f2ddce7fa7b7e..92231d2ef094f 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -4,7 +4,7 @@ from datetime import datetime, timedelta import operator -import pickle +import pickle as pkl import unittest import nose import os @@ -29,25 +29,11 @@ def compare(self, vf): # py3 compat when reading py2 pickle try: - with open(vf,'rb') as fh: - data = pickle.load(fh) - except ValueError as detail: - - # we are trying to read a py3 pickle in py2..... + data = pandas.read_pickle(vf) + except (ValueError) as detail: + # trying to read a py3 pickle in py2 return - # we have a deprecated klass - except TypeError as detail: - - from pandas.compat.pickle_compat import load - data = load(vf) - - except: - if not compat.PY3: - raise - with open(vf,'rb') as fh: - data = pickle.load(fh, encoding='latin1') - for typ, dv in data.items(): for dt, result in dv.items(): @@ -64,23 +50,26 @@ def compare(self, vf): comparator = getattr(tm,"assert_%s_equal" % typ) comparator(result,expected) - def test_read_pickles_0_10_1(self): + def read_pickles(self, version): if not is_little_endian(): - raise nose.SkipTest("known failure of test_read_pickles_0_10_1 on non-little endian") + raise nose.SkipTest("known failure on non-little endian") - pth = tm.get_data_path('legacy_pickle/0.10.1') + pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version))) for f in os.listdir(pth): vf = os.path.join(pth,f) self.compare(vf) + def test_read_pickles_0_10_1(self): + self.read_pickles('0.10.1') + def test_read_pickles_0_11_0(self): - if not is_little_endian(): - raise nose.SkipTest("known failure of test_read_pickles_0_11_0 on non-little endian") + self.read_pickles('0.11.0') - pth = tm.get_data_path('legacy_pickle/0.11.0') - for f in os.listdir(pth): - vf = os.path.join(pth,f) - self.compare(vf) + def test_read_pickles_0_12_0(self): + self.read_pickles('0.12.0') + + def test_read_pickles_0_13_0(self): + self.read_pickles('0.13.0') if __name__ == '__main__': import nose diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index ce538b72904da..e91cad62e7dce 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -533,6 +533,12 @@ def __setstate__(self, state): self.offset = own_state[1] self.tz = own_state[2] np.ndarray.__setstate__(self, nd_state) + + # provide numpy < 1.7 compat + if nd_state[2] == 'M8[us]': + new_state = np.ndarray.__reduce__(self.values.astype('M8[ns]')) + np.ndarray.__setstate__(self, new_state[2]) + else: # pragma: no cover np.ndarray.__setstate__(self, state) diff --git a/setup.py b/setup.py index 956b9b13db2ce..f04b39f864ecf 100755 --- a/setup.py +++ b/setup.py @@ -526,6 +526,8 @@ def pxd(name): package_data={'pandas.io': ['tests/data/legacy_hdf/*.h5', 'tests/data/legacy_pickle/0.10.1/*.pickle', 'tests/data/legacy_pickle/0.11.0/*.pickle', + 'tests/data/legacy_pickle/0.12.0/*.pickle', + 'tests/data/legacy_pickle/0.13.0/*.pickle', 'tests/data/*.csv', 'tests/data/*.dta', 'tests/data/*.txt',