From 407be1c6cabea2e4807054f957ee93b10e346344 Mon Sep 17 00:00:00 2001
From: Ka Wo Chen <kawoc@tepper.cmu.edu>
Date: Mon, 25 Jan 2016 00:20:58 -0500
Subject: [PATCH] DEPR: GH10623 remove items from msgpack.encode for blocks

---
 doc/source/io.rst                             |  18 +
 doc/source/whatsnew/v0.18.0.txt               |  28 +-
 pandas/core/common.py                         |  26 ++
 pandas/core/internals.py                      |  12 +
 pandas/io/packers.py                          | 370 +++++++++---------
 .../0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack | Bin 0 -> 10307 bytes
 .../0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack  | Bin 0 -> 9300 bytes
 .../io/tests/generate_legacy_storage_files.py | 147 ++++---
 pandas/io/tests/test_packers.py               |  15 +-
 9 files changed, 370 insertions(+), 246 deletions(-)
 create mode 100644 pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack
 create mode 100644 pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack

diff --git a/doc/source/io.rst b/doc/source/io.rst
index e2f2301beb078..459d79ec4d98c 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -2539,6 +2539,24 @@ both on the writing (serialization), and reading (deserialization).
    optimizations in the io of the ``msgpack`` data. Since this is marked
    as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release.
 
+   As a result of writing format changes and other issues:
+   +----------------------+------------------------+
+   | Packed with          | Can be unpacked with   |
+   +======================+========================+
+   | pre-0.17 / Python 2  | any                    |
+   +----------------------+------------------------+
+   | pre-0.17 / Python 3  | any                    |
+   +----------------------+------------------------+
+   | 0.17 / Python 2      | - 0.17 / Python 2      |
+   |                      | - >=0.18 / any Python  |
+   +----------------------+------------------------+
+   | 0.17 / Python 3      | >=0.18 / any Python    |
+   +----------------------+------------------------+
+   | 0.18                 | >= 0.18                |
+   +======================+========================+
+   
+   Reading (files packed by older versions) is backward-compatibile, except for files packed with 0.17 in Python 2, in which case only they can only be unpacked in Python 2.
+
 .. ipython:: python
 
    df = DataFrame(np.random.rand(5,2),columns=list('AB'))
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index 8429739902927..47e78cf558a16 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -513,6 +513,33 @@ Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`1
 ``pd.Timestamp`` to rehydrate any timestamp like object from its isoformat
 (:issue:`12300`).
 
+Changes to msgpack
+^^^^^^^^^^^^^^^^^^
+
+Forward incompatible changes in ``msgpack`` writing format were made over 0.17.0 and 0.18.0; older versions of pandas cannot read files packed by newer versions (:issue:`12129`, `10527`)
+
+Bug in ``to_msgpack`` and ``read_msgpack`` introduced in 0.17.0 and fixed in 0.18.0, caused files packed in Python 2 unreadable by Python 3 (:issue:`12142`)
+
+.. warning::
+
+   As a result of a number of issues:
+
+   +----------------------+------------------------+
+   | Packed with          | Can be unpacked with   |
+   +======================+========================+
+   | pre-0.17 / Python 2  | any                    |
+   +----------------------+------------------------+
+   | pre-0.17 / Python 3  | any                    |
+   +----------------------+------------------------+
+   | 0.17 / Python 2      | - 0.17 / Python 2      |
+   |                      | - >=0.18 / any Python  |
+   +----------------------+------------------------+
+   | 0.17 / Python 3      | >=0.18 / any Python    |
+   +----------------------+------------------------+
+   | 0.18                 | >= 0.18                |
+   +======================+========================+
+
+   0.18.0 is backward-compatible for reading files packed by older versions, except for files packed with 0.17 in Python 2, in which case only they can only be unpacked in Python 2.
 
 Signature change for .rank
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -806,7 +833,6 @@ assignments are valid for multi-line expressions.
 
 Other API Changes
 ^^^^^^^^^^^^^^^^^
-
 - ``DataFrame.between_time`` and ``Series.between_time`` now only parse a fixed set of time strings. Parsing of date strings is no longer supported and raises a ``ValueError``. (:issue:`11818`)
 
   .. ipython:: python
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 70c02c5632d80..4f3ec58910950 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -3039,3 +3039,29 @@ def _random_state(state=None):
     else:
         raise ValueError("random_state must be an integer, a numpy "
                          "RandomState, or None")
+
+
+def pandas_dtype(dtype):
+    """
+    Converts input into a pandas only dtype object or a numpy dtype object.
+
+    Parameters
+    ----------
+    dtype : object to be converted
+
+    Returns
+    -------
+    np.dtype or a pandas dtype
+    """
+    if isinstance(dtype, compat.string_types):
+        try:
+            return DatetimeTZDtype.construct_from_string(dtype)
+        except TypeError:
+            pass
+
+        try:
+            return CategoricalDtype.construct_from_string(dtype)
+        except TypeError:
+            pass
+
+    return np.dtype(dtype)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 8973ea025e611..c6b04757e201c 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -2098,6 +2098,14 @@ def __init__(self, values, placement, ndim=2, **kwargs):
 
         if not isinstance(values, self._holder):
             values = self._holder(values)
+
+        dtype = kwargs.pop('dtype', None)
+
+        if dtype is not None:
+            if isinstance(dtype, compat.string_types):
+                dtype = DatetimeTZDtype.construct_from_string(dtype)
+            values = values.tz_localize('UTC').tz_convert(dtype.tz)
+
         if values.tz is None:
             raise ValueError("cannot create a DatetimeTZBlock without a tz")
 
@@ -2428,6 +2436,10 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None,
         else:
             klass = ObjectBlock
 
+    elif klass is DatetimeTZBlock and not is_datetimetz(values):
+        return klass(values, ndim=ndim, fastpath=fastpath,
+                     placement=placement, dtype=dtype)
+
     return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
 
 # TODO: flexible with index=None and/or items=None
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 372c8d80e5a1a..701b78d2771fb 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -44,7 +44,7 @@
 
 import numpy as np
 from pandas import compat
-from pandas.compat import u
+from pandas.compat import u, u_safe
 from pandas import (Timestamp, Period, Series, DataFrame,  # noqa
                     Index, MultiIndex, Float64Index, Int64Index,
                     Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT)
@@ -52,7 +52,7 @@
 from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
 from pandas.sparse.array import BlockIndex, IntIndex
 from pandas.core.generic import NDFrame
-from pandas.core.common import needs_i8_conversion
+from pandas.core.common import needs_i8_conversion, pandas_dtype
 from pandas.io.common import get_filepath_or_buffer
 from pandas.core.internals import BlockManager, make_block
 import pandas.core.internals as internals
@@ -84,6 +84,8 @@ def to_msgpack(path_or_buf, *args, **kwargs):
     """
     global compressor
     compressor = kwargs.pop('compress', None)
+    if compressor:
+        compressor = u(compressor)
     append = kwargs.pop('append', None)
     if append:
         mode = 'a+b'
@@ -180,7 +182,7 @@ def dtype_for(t):
     """ return my dtype mapping, whether number or name """
     if t in dtype_dict:
         return dtype_dict[t]
-    return np.typeDict[t]
+    return np.typeDict.get(t, t)
 
 c2f_dict = {'complex': np.float64,
             'complex128': np.float64,
@@ -248,15 +250,17 @@ def unconvert(values, dtype, compress=None):
     if dtype == np.object_:
         return np.array(values, dtype=object)
 
+    dtype = pandas_dtype(dtype).base
+
     if not as_is_ext:
         values = values.encode('latin1')
 
-    if compress == 'zlib':
+    if compress == u'zlib':
         import zlib
         values = zlib.decompress(values)
         return np.frombuffer(values, dtype=dtype)
 
-    elif compress == 'blosc':
+    elif compress == u'blosc':
         import blosc
         values = blosc.decompress(values)
         return np.frombuffer(values, dtype=dtype)
@@ -269,53 +273,52 @@ def encode(obj):
     """
     Data encoder
     """
-
     tobj = type(obj)
     if isinstance(obj, Index):
         if isinstance(obj, RangeIndex):
-            return {'typ': 'range_index',
-                    'klass': obj.__class__.__name__,
-                    'name': getattr(obj, 'name', None),
-                    'start': getattr(obj, '_start', None),
-                    'stop': getattr(obj, '_stop', None),
-                    'step': getattr(obj, '_step', None)}
+            return {u'typ': u'range_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'start': getattr(obj, '_start', None),
+                    u'stop': getattr(obj, '_stop', None),
+                    u'step': getattr(obj, '_step', None)}
         elif isinstance(obj, PeriodIndex):
-            return {'typ': 'period_index',
-                    'klass': obj.__class__.__name__,
-                    'name': getattr(obj, 'name', None),
-                    'freq': getattr(obj, 'freqstr', None),
-                    'dtype': obj.dtype.name,
-                    'data': convert(obj.asi8),
-                    'compress': compressor}
+            return {u'typ': u'period_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'freq': u_safe(getattr(obj, 'freqstr', None)),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.asi8),
+                    u'compress': compressor}
         elif isinstance(obj, DatetimeIndex):
             tz = getattr(obj, 'tz', None)
 
             # store tz info and data as UTC
             if tz is not None:
-                tz = tz.zone
+                tz = u(tz.zone)
                 obj = obj.tz_convert('UTC')
-            return {'typ': 'datetime_index',
-                    'klass': obj.__class__.__name__,
-                    'name': getattr(obj, 'name', None),
-                    'dtype': obj.dtype.name,
-                    'data': convert(obj.asi8),
-                    'freq': getattr(obj, 'freqstr', None),
-                    'tz': tz,
-                    'compress': compressor}
+            return {u'typ': u'datetime_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.asi8),
+                    u'freq': u_safe(getattr(obj, 'freqstr', None)),
+                    u'tz': tz,
+                    u'compress': compressor}
         elif isinstance(obj, MultiIndex):
-            return {'typ': 'multi_index',
-                    'klass': obj.__class__.__name__,
-                    'names': getattr(obj, 'names', None),
-                    'dtype': obj.dtype.name,
-                    'data': convert(obj.values),
-                    'compress': compressor}
+            return {u'typ': u'multi_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'names': getattr(obj, 'names', None),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.values),
+                    u'compress': compressor}
         else:
-            return {'typ': 'index',
-                    'klass': obj.__class__.__name__,
-                    'name': getattr(obj, 'name', None),
-                    'dtype': obj.dtype.name,
-                    'data': convert(obj.values),
-                    'compress': compressor}
+            return {u'typ': u'index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.values),
+                    u'compress': compressor}
     elif isinstance(obj, Series):
         if isinstance(obj, SparseSeries):
             raise NotImplementedError(
@@ -332,13 +335,13 @@ def encode(obj):
             #    d[f] = getattr(obj, f, None)
             # return d
         else:
-            return {'typ': 'series',
-                    'klass': obj.__class__.__name__,
-                    'name': getattr(obj, 'name', None),
-                    'index': obj.index,
-                    'dtype': obj.dtype.name,
-                    'data': convert(obj.values),
-                    'compress': compressor}
+            return {u'typ': u'series',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'index': obj.index,
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.values),
+                    u'compress': compressor}
     elif issubclass(tobj, NDFrame):
         if isinstance(obj, SparseDataFrame):
             raise NotImplementedError(
@@ -371,86 +374,85 @@ def encode(obj):
                 data = data.consolidate()
 
             # the block manager
-            return {'typ': 'block_manager',
-                    'klass': obj.__class__.__name__,
-                    'axes': data.axes,
-                    'blocks': [{'items': data.items.take(b.mgr_locs),
-                                'locs': b.mgr_locs.as_array,
-                                'values': convert(b.values),
-                                'shape': b.values.shape,
-                                'dtype': b.dtype.name,
-                                'klass': b.__class__.__name__,
-                                'compress': compressor
-                                } for b in data.blocks]}
+            return {u'typ': u'block_manager',
+                    u'klass': u(obj.__class__.__name__),
+                    u'axes': data.axes,
+                    u'blocks': [{u'locs': b.mgr_locs.as_array,
+                                 u'values': convert(b.values),
+                                 u'shape': b.values.shape,
+                                 u'dtype': u(b.dtype.name),
+                                 u'klass': u(b.__class__.__name__),
+                                 u'compress': compressor} for b in data.blocks]
+                    }
 
     elif isinstance(obj, (datetime, date, np.datetime64, timedelta,
                           np.timedelta64, NaTType)):
         if isinstance(obj, Timestamp):
             tz = obj.tzinfo
             if tz is not None:
-                tz = tz.zone
+                tz = u(tz.zone)
             offset = obj.offset
             if offset is not None:
-                offset = offset.freqstr
-            return {'typ': 'timestamp',
-                    'value': obj.value,
-                    'offset': offset,
-                    'tz': tz}
+                offset = u(offset.freqstr)
+            return {u'typ': u'timestamp',
+                    u'value': obj.value,
+                    u'offset': offset,
+                    u'tz': tz}
         if isinstance(obj, NaTType):
-            return {'typ': 'nat'}
+            return {u'typ': u'nat'}
         elif isinstance(obj, np.timedelta64):
-            return {'typ': 'timedelta64',
-                    'data': obj.view('i8')}
+            return {u'typ': u'timedelta64',
+                    u'data': obj.view('i8')}
         elif isinstance(obj, timedelta):
-            return {'typ': 'timedelta',
-                    'data': (obj.days, obj.seconds, obj.microseconds)}
+            return {u'typ': u'timedelta',
+                    u'data': (obj.days, obj.seconds, obj.microseconds)}
         elif isinstance(obj, np.datetime64):
-            return {'typ': 'datetime64',
-                    'data': str(obj)}
+            return {u'typ': u'datetime64',
+                    u'data': u(str(obj))}
         elif isinstance(obj, datetime):
-            return {'typ': 'datetime',
-                    'data': obj.isoformat()}
+            return {u'typ': u'datetime',
+                    u'data': u(obj.isoformat())}
         elif isinstance(obj, date):
-            return {'typ': 'date',
-                    'data': obj.isoformat()}
+            return {u'typ': u'date',
+                    u'data': u(obj.isoformat())}
         raise Exception("cannot encode this datetimelike object: %s" % obj)
     elif isinstance(obj, Period):
-        return {'typ': 'period',
-                'ordinal': obj.ordinal,
-                'freq': obj.freq}
+        return {u'typ': u'period',
+                u'ordinal': obj.ordinal,
+                u'freq': u(obj.freq)}
     elif isinstance(obj, BlockIndex):
-        return {'typ': 'block_index',
-                'klass': obj.__class__.__name__,
-                'blocs': obj.blocs,
-                'blengths': obj.blengths,
-                'length': obj.length}
+        return {u'typ': u'block_index',
+                u'klass': u(obj.__class__.__name__),
+                u'blocs': obj.blocs,
+                u'blengths': obj.blengths,
+                u'length': obj.length}
     elif isinstance(obj, IntIndex):
-        return {'typ': 'int_index',
-                'klass': obj.__class__.__name__,
-                'indices': obj.indices,
-                'length': obj.length}
+        return {u'typ': u'int_index',
+                u'klass': u(obj.__class__.__name__),
+                u'indices': obj.indices,
+                u'length': obj.length}
     elif isinstance(obj, np.ndarray):
-        return {'typ': 'ndarray',
-                'shape': obj.shape,
-                'ndim': obj.ndim,
-                'dtype': obj.dtype.name,
-                'data': convert(obj),
-                'compress': compressor}
+        return {u'typ': u'ndarray',
+                u'shape': obj.shape,
+                u'ndim': obj.ndim,
+                u'dtype': u(obj.dtype.name),
+                u'data': convert(obj),
+                u'compress': compressor}
     elif isinstance(obj, np.number):
         if np.iscomplexobj(obj):
-            return {'typ': 'np_scalar',
-                    'sub_typ': 'np_complex',
-                    'dtype': obj.dtype.name,
-                    'real': obj.real.__repr__(),
-                    'imag': obj.imag.__repr__()}
+            return {u'typ': u'np_scalar',
+                    u'sub_typ': u'np_complex',
+                    u'dtype': u(obj.dtype.name),
+                    u'real': u(obj.real.__repr__()),
+                    u'imag': u(obj.imag.__repr__())}
         else:
-            return {'typ': 'np_scalar',
-                    'dtype': obj.dtype.name,
-                    'data': obj.__repr__()}
+            return {u'typ': u'np_scalar',
+                    u'dtype': u(obj.dtype.name),
+                    u'data': u(obj.__repr__())}
     elif isinstance(obj, complex):
-        return {'typ': 'np_complex',
-                'real': obj.real.__repr__(),
-                'imag': obj.imag.__repr__()}
+        return {u'typ': u'np_complex',
+                u'real': u(obj.real.__repr__()),
+                u'imag': u(obj.imag.__repr__())}
 
     return obj
 
@@ -460,83 +462,91 @@ def decode(obj):
     Decoder for deserializing numpy data types.
     """
 
-    typ = obj.get('typ')
+    typ = obj.get(u'typ')
     if typ is None:
         return obj
-    elif typ == 'timestamp':
-        return Timestamp(obj['value'], tz=obj['tz'], offset=obj['offset'])
-    elif typ == 'nat':
+    elif typ == u'timestamp':
+        return Timestamp(obj[u'value'], tz=obj[u'tz'], offset=obj[u'offset'])
+    elif typ == u'nat':
         return NaT
-    elif typ == 'period':
-        return Period(ordinal=obj['ordinal'], freq=obj['freq'])
-    elif typ == 'index':
-        dtype = dtype_for(obj['dtype'])
-        data = unconvert(obj['data'], dtype,
-                         obj.get('compress'))
-        return globals()[obj['klass']](data, dtype=dtype, name=obj['name'])
-    elif typ == 'range_index':
-        return globals()[obj['klass']](obj['start'],
-                                       obj['stop'],
-                                       obj['step'],
-                                       name=obj['name'])
-    elif typ == 'multi_index':
-        dtype = dtype_for(obj['dtype'])
-        data = unconvert(obj['data'], dtype,
-                         obj.get('compress'))
+    elif typ == u'period':
+        return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
+    elif typ == u'index':
+        dtype = dtype_for(obj[u'dtype'])
+        data = unconvert(obj[u'data'], dtype,
+                         obj.get(u'compress'))
+        return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name'])
+    elif typ == u'range_index':
+        return globals()[obj[u'klass']](obj[u'start'],
+                                        obj[u'stop'],
+                                        obj[u'step'],
+                                        name=obj[u'name'])
+    elif typ == u'multi_index':
+        dtype = dtype_for(obj[u'dtype'])
+        data = unconvert(obj[u'data'], dtype,
+                         obj.get(u'compress'))
         data = [tuple(x) for x in data]
-        return globals()[obj['klass']].from_tuples(data, names=obj['names'])
-    elif typ == 'period_index':
-        data = unconvert(obj['data'], np.int64, obj.get('compress'))
-        d = dict(name=obj['name'], freq=obj['freq'])
-        return globals()[obj['klass']](data, **d)
-    elif typ == 'datetime_index':
-        data = unconvert(obj['data'], np.int64, obj.get('compress'))
-        d = dict(name=obj['name'], freq=obj['freq'], verify_integrity=False)
-        result = globals()[obj['klass']](data, **d)
-        tz = obj['tz']
+        return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names'])
+    elif typ == u'period_index':
+        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
+        d = dict(name=obj[u'name'], freq=obj[u'freq'])
+        return globals()[obj[u'klass']](data, **d)
+    elif typ == u'datetime_index':
+        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
+        d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
+        result = globals()[obj[u'klass']](data, **d)
+        tz = obj[u'tz']
 
         # reverse tz conversion
         if tz is not None:
             result = result.tz_localize('UTC').tz_convert(tz)
         return result
 
-    elif typ == 'series':
-        dtype = dtype_for(obj['dtype'])
-        index = obj['index']
-        return globals()[obj['klass']](unconvert(obj['data'], dtype,
-                                                 obj['compress']),
-                                       index=index,
-                                       dtype=dtype,
-                                       name=obj['name'])
-    elif typ == 'block_manager':
-        axes = obj['axes']
+    elif typ == u'series':
+        dtype = dtype_for(obj[u'dtype'])
+        pd_dtype = pandas_dtype(dtype)
+        np_dtype = pandas_dtype(dtype).base
+        index = obj[u'index']
+        result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
+                                                    obj[u'compress']),
+                                          index=index,
+                                          dtype=np_dtype,
+                                          name=obj[u'name'])
+        tz = getattr(pd_dtype, 'tz', None)
+        if tz:
+            result = result.dt.tz_localize('UTC').dt.tz_convert(tz)
+        return result
+
+    elif typ == u'block_manager':
+        axes = obj[u'axes']
 
         def create_block(b):
-            values = unconvert(b['values'], dtype_for(b['dtype']),
-                               b['compress']).reshape(b['shape'])
+            values = unconvert(b[u'values'], dtype_for(b[u'dtype']),
+                               b[u'compress']).reshape(b[u'shape'])
 
             # locs handles duplicate column names, and should be used instead
             # of items; see GH 9618
-            if 'locs' in b:
-                placement = b['locs']
+            if u'locs' in b:
+                placement = b[u'locs']
             else:
-                placement = axes[0].get_indexer(b['items'])
+                placement = axes[0].get_indexer(b[u'items'])
             return make_block(values=values,
-                              klass=getattr(internals, b['klass']),
-                              placement=placement)
-
-        blocks = [create_block(b) for b in obj['blocks']]
-        return globals()[obj['klass']](BlockManager(blocks, axes))
-    elif typ == 'datetime':
-        return parse(obj['data'])
-    elif typ == 'datetime64':
-        return np.datetime64(parse(obj['data']))
-    elif typ == 'date':
-        return parse(obj['data']).date()
-    elif typ == 'timedelta':
-        return timedelta(*obj['data'])
-    elif typ == 'timedelta64':
-        return np.timedelta64(int(obj['data']))
+                              klass=getattr(internals, b[u'klass']),
+                              placement=placement,
+                              dtype=b[u'dtype'])
+
+        blocks = [create_block(b) for b in obj[u'blocks']]
+        return globals()[obj[u'klass']](BlockManager(blocks, axes))
+    elif typ == u'datetime':
+        return parse(obj[u'data'])
+    elif typ == u'datetime64':
+        return np.datetime64(parse(obj[u'data']))
+    elif typ == u'date':
+        return parse(obj[u'data']).date()
+    elif typ == u'timedelta':
+        return timedelta(*obj[u'data'])
+    elif typ == u'timedelta64':
+        return np.timedelta64(int(obj[u'data']))
     # elif typ == 'sparse_series':
     #    dtype = dtype_for(obj['dtype'])
     #    return globals()[obj['klass']](
@@ -554,25 +564,25 @@ def create_block(b):
     #        obj['data'], items=obj['items'],
     #        default_fill_value=obj['default_fill_value'],
     #        default_kind=obj['default_kind'])
-    elif typ == 'block_index':
-        return globals()[obj['klass']](obj['length'], obj['blocs'],
-                                       obj['blengths'])
-    elif typ == 'int_index':
-        return globals()[obj['klass']](obj['length'], obj['indices'])
-    elif typ == 'ndarray':
-        return unconvert(obj['data'], np.typeDict[obj['dtype']],
-                         obj.get('compress')).reshape(obj['shape'])
-    elif typ == 'np_scalar':
-        if obj.get('sub_typ') == 'np_complex':
-            return c2f(obj['real'], obj['imag'], obj['dtype'])
+    elif typ == u'block_index':
+        return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
+                                        obj[u'blengths'])
+    elif typ == u'int_index':
+        return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
+    elif typ == u'ndarray':
+        return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
+                         obj.get(u'compress')).reshape(obj[u'shape'])
+    elif typ == u'np_scalar':
+        if obj.get(u'sub_typ') == u'np_complex':
+            return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
         else:
-            dtype = dtype_for(obj['dtype'])
+            dtype = dtype_for(obj[u'dtype'])
             try:
-                return dtype(obj['data'])
+                return dtype(obj[u'data'])
             except:
-                return dtype.type(obj['data'])
-    elif typ == 'np_complex':
-        return complex(obj['real'] + '+' + obj['imag'] + 'j')
+                return dtype.type(obj[u'data'])
+    elif typ == u'np_complex':
+        return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
     elif isinstance(obj, (dict, list, set)):
         return obj
     else:
diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack b/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack
new file mode 100644
index 0000000000000000000000000000000000000000..e89b5dd99150e04a0702cfb6f68b8f5f8b243c27
GIT binary patch
literal 10307
zcmeHNOK%%h7#%;xc{Qks5~&LoC_+%75YaRZP+n;oRf<qW5RVN)(IlR<rnblROdt(N
zQPn7c1%)_`R8=gp;2*GHNf1|!L@HR7L|uT`pb8dLtjdo2_|Dj#c<k|DBd64hIWu>1
zXYTzT=X*?6n4Zce&4nv0o-`cuW7a-vWpa7bwzpW+S=?g!g|uPYtb2mrby68~Qv5a3
zlg_1_$sQ4N#OUbAJ6Zc(7N5(T7g=P2#rW;w-D&v>`37TqO4GEPckd)__i9@IqV<-*
zWLNx4fnR-b{V##hp{Gy%#PRiSzWq$#=G5Ci3;grf-v@U&zHBdjDDcyG-}k${ES@zo
z<`#>kvd#)FXoY-t&ge+#uEajoD)>V9FQPz;62=JQggRk@u#K>tu!9hi>YNmBRAQ21
zPy%L&oS9Eq$<@FMjMIvA!USO(VLM?5VJBf1;UU6qLM*UrN-jjLkTiByY3z_%Bk8Nn
zo;A`&-oDH_gpO^;$mEulRmhDMN16V<k)Ahy?7f}1bN8mEv4k}@XPeFzOW6EnroAea
zvr5JmR7pokw6&Flz4-|r79z}&mQ-pJWaNoA<YHmnWIM`W_OAttg8|{1MKh`5PRR}v
z*35h6tW)|@`P_{{Bvy#T3(;61%8$Cck+bt@CskraT^TVzn$d;9+XWW0vL@Hu2NsM<
z>Lu_7+vd`iA>6~eMYWZCuh5pp6t)u{9%H(kS==Sbw}#X0mm!&^=5;z(3_i{jN}nOO
zdw+@BJhx!w#|N4rLy4`-4Fr3uAI#oQg@Ji9mYmN)x?X1RbsH8r#UkS@a#}<ieoh{&
zmv{{EZm2znG!@|)nbd;GSzf)}yC0HeNs#rrUM;a=rwwH~h*?q6j;rG8Y~Qnd<JJ#X
zOW8g)XIVRAG@V70&H<CdzI;aJ3WBzRWh{dFFSB^wJU6_8C<57fb$*@G_l%L39F9}Y
z<nnZ}ILc4eC>Je8<*6FIIKO~Vc`9N7`9f!)5Tehe^0p&_;;fZTa$?W((6S=K^)gG$
zq^;Qt_L@+mV`glxIs5IcK<?@9m8H_)?~qSUrZN#o&BsypV47~)#&r9<ku%q#DuZdu
zCXIaFxJ1Juq9$Z+V#~x+O^1)+VSd-3Hr)4KL(=U$N{tIic~&1c7I>6yn9#fr@{!SY
z<HD>=C%UB(OlOR&an8(R&mFuVFwO{Lp=98JwH9S1e#y}NrRw(&kE#1Si-2FA5m{H6
zy%fXE|HEQ9O}ge%sFz&Us>l0~%(AQ0#E)Q<NM%Ne6d9%HXlk;bjfk{BmC>F4x(^|S
zaXhv{+7ns~AiFSPz+4ceUt0g45oXQmtG*aKV=OY=IiEK<>5)Ri1leG;D)M62OX?26
z>&TF+<;Ee6Wc!HgEMASnYGRe0CsuTx1@{^11l8q{F?Bsac$5&%1wwt4#vdnqicll$
zBYceT3Bti3&dBNu-9d!ive8=>EUuwl3&BH8FtKLRaiCBfLJTcS#~S_Mthfe=`Ot9)
z5lrBbjf_Z$BH)%_f%%Wkzd!Hm(X^ApSAP-s$#(-o{T%n+{Q4t-PyY7mPJa*9>-M+H
z@-k077(yVY@q$JQ1mQKXKsI=1-m;F#<FGs)mq*_D<Kri=#;(_7ld?<%DDmne#A}E?
zvOnWn2DU0Ec1TW~E)BpBc~SL>L-@c$UgSMWUmqy@Bv3~yWY=w)2c`kp8E64s<n1BO
z3r)xx_S~;{_<9G~)NBvkkW}bPk&Wt!k{L9PX*bH4wwh2|m2CB#ku}p-*aKMatjF}I
z+kd==w2Kf04GI<%Kqzofz@Q*P!GnTkU`#y-1rQ1x6ig_H2I=pH2%jN5MmS7(oN$Ej
z1mP&*vxLtPK2La(@CCvb3113cXsvr~6In-Tfhv0OEm>%J7w^-Tt<<}wCW6mb&MJKZ
zeM&eDtB|OK=%?wEyt&^5Iu2UItw&1Sr1*tVlRvl#5bgqm+W_G{Xv{XL+H0Q7P^BSs
zRW5gYWQNJWs1l-k#5mCJKJpcx(opqB0=sR2L0mN~Eubqk?h}Xig?_|S`-@g>XI1|+
z;dWZ;Zt0avu?bmqC5Ec@b!|Ob;<uZwjj0p(D-iw)guep!`)&DiFb{}%K+FdseAdMS
S|H`hbXvAw~J|W^j+VwvhKPh(r

literal 0
HcmV?d00001

diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack b/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack
new file mode 100644
index 0000000000000000000000000000000000000000..98efdabedea72a876f6983fcae0f64311b99a00f
GIT binary patch
literal 9300
zcmeHNPfQ$D9G=}}DYcpafusiy4KdmnB$iUt`fs762{DZr|6GhA1N*jJyR);M8I+<4
zX-Ys3R(B{#(|BOnouz>F;=!XBlah$hQ-$=P@nRZ1*m%O5@6G%DnAw?Kc6Vk1G+__(
z&HsJB_rCXizxUou7geGpi8_;yk7Z_zxOO>1785C1)OvgJ^Z+j38kMDtLR59tNT%at
ze45``lt<ntqZ+%BpSRt2F(s<1G05&N#6?Y<%k|~@b7yh`xxPYLl*p=K-cfwh{D1S7
zwc|qR@4>5Z9OKvo*c{`pSkP1`GyVIWOZ*8h@z=B{L`zDfx98oo`d%TSkW0A%7QptE
zVKYK!B)13PK`WxQAzG?C_wO~{Z54#}3Hfb?@gofdhTnX7<6nm1u4npwrTE5o-+#_<
ze(0Uw82<CeU!5BiUr{GNV)$7@+m9Qq3);kL#oA$i$A2NfFkl3*0Wb>K2zUsv3Ggr=
z)S{I1)AB3QSV~KlueIfK?3z_&b9yMEhZ^*7L=V$#)L=|d^}L)WdF?|v2uj2?2xhxM
zc~e(sv&EtEWL_R*9J7M4(Y0EGRBThm_}VB0uQ2;&-OaUEp#jfKsMMLaCLuH_IZomb
znoU`DmY_X|Y;GOH3QZXwh^aC1OUu;FpMJh32#jP)OkPw?lrlv*kx+@Y%6fG^A<OGA
zc#sF2r<w)fT2bt~Xv?sCJAB+J32Q-0UMVQ#e7D;q>XC{YdVWMym@@DkDi3UD0?$k^
z!O&T?@-uA5JacJmoS$JkOjNkW7>T5!YP5Ps<#gQ0h-s3#YM8xgip!2M#Llr6ygibV
zM=uUbVp=>;l%jd0W*wc*GKiwd0d;NOQ5{HlL?H$@0HT{AHpBU?Nq!$<z0c!&+FbAB
zfi9nA{@%}GG?E&TRChy_7eJev>&75`vd`T1b2=C33-MJJpBK|{QBlOpR&^E9@uU<o
z4S$mdO1CJWhI&EFkZhRaY&aS&h~q?^4W@|j7OQmS<Mb``UP{Z-BhUq!iUq~V2irBw
zcbr^uat}FkG{*0L9Pk)m2jCNcPXZnXd>XJ5unX{6z!QMofF}Wa08at-0zL=$Jm3p}
zrvYCCd<pR7Sa2MlH8WPoRz0wL?Wll1q&FsPK=TODJmQ1qC8gYDg4f%T-2I(1e#x}{
z;hK4`SJh%GSLRKCyhp5Gi-Y4;mTRYH)!aZ<2|%L|&?poQm*f+c=AuPe)q;#fL}uc<
z@jPOs>%3ZS(bU+Vq4=*9EUCLv0<jR#fnDxlwmi7JSJ^$;vJfMo(1F!78`Oi7EYb0+
zc$S^@k>QLZI{?u=&@DP*`~tcG_BPB&&`r=i&@HexqWc_!_n^CUK8&iaN1`V2bPw6U
zo`^b4+-oOGbqQueTl){mfQ^mhX%4V)vpBir!LBRGsve3~QB^Ba)F4z#=fq54YKgg`
zr#P{crAkTT^vqY?k{&nP>lyXU)i&y*<3K~Zpb%1SaOcV<YS6yxRWk~!kvXX=(Rph!
zz)WGE&>6ci5(kg>Ih~<R9TS1>tcW%3-}C={(bgger@OEH&hXP8I=b2^Zoc*H#|)qP
z^R@N%79Q~ST(HlSCpX|D2SjOcbz+DbG6z?*Hw$#8<OJJvv&~7i>0z5wzNfW#?X0|R
zE(-Xo8=bBccprIf3MjxhBM!9$^9pq4*|XHS)-iS~BP^M$szK7KPgq%_fohVr^L0$`
zf|Nd+qj>N*_dqi^0@zBnYfW#Ck84lP4%|###UZcd+fMg#9v`@wpBdrop19c@bFQ*(
z7N8krb7y=QHCmC&s6AWIb#r2pE~WEQ?Uo9$^{U!62<?Z@5xUY4bMc(pinAY9I>w5`
z>(1k#`mask53FZV9TiieqF%9<HF43i^={+6`*(I)^yE`kR~d1<lGMskoXw62Fs69*
zee}TApUA`ZQAjaEKbDc>GyJoabNrJQYZCq|e!2Xo7%M~Ov$oF{hJ*&_LQ%j*z(asd
TfQJDe0XzcuDBxp&T#x<-G>Dtb

literal 0
HcmV?d00001

diff --git a/pandas/io/tests/generate_legacy_storage_files.py b/pandas/io/tests/generate_legacy_storage_files.py
index f556c980bb80c..bfa8ff6d30a9c 100644
--- a/pandas/io/tests/generate_legacy_storage_files.py
+++ b/pandas/io/tests/generate_legacy_storage_files.py
@@ -6,6 +6,7 @@
                     Index, MultiIndex, bdate_range, to_msgpack,
                     date_range, period_range,
                     Timestamp, Categorical, Period)
+from pandas.compat import u
 import os
 import sys
 import numpy as np
@@ -13,6 +14,9 @@
 import platform as pl
 
 
+_loose_version = LooseVersion(pandas.__version__)
+
+
 def _create_sp_series():
     nan = np.nan
 
@@ -22,7 +26,7 @@ def _create_sp_series():
     arr[-1:] = nan
 
     bseries = SparseSeries(arr, kind='block')
-    bseries.name = 'bseries'
+    bseries.name = u'bseries'
     return bseries
 
 
@@ -36,17 +40,17 @@ def _create_sp_tsseries():
 
     date_index = bdate_range('1/1/2011', periods=len(arr))
     bseries = SparseSeries(arr, index=date_index, kind='block')
-    bseries.name = 'btsseries'
+    bseries.name = u'btsseries'
     return bseries
 
 
 def _create_sp_frame():
     nan = np.nan
 
-    data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
-            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
-            'C': np.arange(10).astype(np.int64),
-            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
+    data = {u'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
+            u'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
+            u'C': np.arange(10).astype(np.int64),
+            u'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
 
     dates = bdate_range('1/1/2011', periods=10)
     return SparseDataFrame(data, index=dates)
@@ -56,79 +60,79 @@ def create_data():
     """ create the pickle/msgpack data """
 
     data = {
-        'A': [0., 1., 2., 3., np.nan],
-        'B': [0, 1, 0, 1, 0],
-        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
-        'D': date_range('1/1/2009', periods=5),
-        'E': [0., 1, Timestamp('20100101'), 'foo', 2.]
+        u'A': [0., 1., 2., 3., np.nan],
+        u'B': [0, 1, 0, 1, 0],
+        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
+        u'D': date_range('1/1/2009', periods=5),
+        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
     }
 
-    scalars = dict(timestamp=Timestamp('20130101'))
-    if LooseVersion(pandas.__version__) >= '0.17.0':
-        scalars['period'] = Period('2012', 'M')
+    scalars = dict(timestamp=Timestamp('20130101'),
+                   period=Period('2012', 'M'))
 
     index = dict(int=Index(np.arange(10)),
                  date=date_range('20130101', periods=10),
                  period=period_range('2013-01-01', freq='M', periods=10))
 
     mi = dict(reg2=MultiIndex.from_tuples(
-        tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo',
-                     'foo', 'qux', 'qux'],
-                    ['one', 'two', 'one', 'two', 'one',
-                     'two', 'one', 'two']])),
-        names=['first', 'second']))
-    series = dict(float=Series(data['A']),
-                  int=Series(data['B']),
-                  mixed=Series(data['E']),
+        tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo',
+                     u'foo', u'qux', u'qux'],
+                    [u'one', u'two', u'one', u'two', u'one',
+                     u'two', u'one', u'two']])),
+        names=[u'first', u'second']))
+    series = dict(float=Series(data[u'A']),
+                  int=Series(data[u'B']),
+                  mixed=Series(data[u'E']),
                   ts=Series(np.arange(10).astype(np.int64),
                             index=date_range('20130101', periods=10)),
                   mi=Series(np.arange(5).astype(np.float64),
                             index=MultiIndex.from_tuples(
                                 tuple(zip(*[[1, 1, 2, 2, 2],
                                             [3, 4, 3, 4, 5]])),
-                                names=['one', 'two'])),
+                                names=[u'one', u'two'])),
                   dup=Series(np.arange(5).astype(np.float64),
-                             index=['A', 'B', 'C', 'D', 'A']),
-                  cat=Series(Categorical(['foo', 'bar', 'baz'])),
+                             index=[u'A', u'B', u'C', u'D', u'A']),
+                  cat=Series(Categorical([u'foo', u'bar', u'baz'])),
                   dt=Series(date_range('20130101', periods=5)),
                   dt_tz=Series(date_range('20130101', periods=5,
-                                          tz='US/Eastern')))
-    if LooseVersion(pandas.__version__) >= '0.17.0':
-        series['period'] = Series([Period('2000Q1')] * 5)
+                                          tz='US/Eastern')),
+                  period=Series([Period('2000Q1')] * 5))
 
     mixed_dup_df = DataFrame(data)
-    mixed_dup_df.columns = list("ABCDA")
-    frame = dict(float=DataFrame(dict(A=series['float'],
-                                      B=series['float'] + 1)),
-                 int=DataFrame(dict(A=series['int'], B=series['int'] + 1)),
-                 mixed=DataFrame(dict([(k, data[k])
-                                       for k in ['A', 'B', 'C', 'D']])),
-                 mi=DataFrame(dict(A=np.arange(5).astype(np.float64),
-                                   B=np.arange(5).astype(np.int64)),
+    mixed_dup_df.columns = list(u"ABCDA")
+    frame = dict(float=DataFrame({u'A': series[u'float'],
+                                  u'B': series[u'float'] + 1}),
+                 int=DataFrame({u'A': series[u'int'],
+                                u'B': series[u'int'] + 1}),
+                 mixed=DataFrame({k: data[k]
+                                  for k in [u'A', u'B', u'C', u'D']}),
+                 mi=DataFrame({u'A': np.arange(5).astype(np.float64),
+                               u'B': np.arange(5).astype(np.int64)},
                               index=MultiIndex.from_tuples(
-                                  tuple(zip(*[['bar', 'bar', 'baz',
-                                               'baz', 'baz'],
-                                              ['one', 'two', 'one',
-                                               'two', 'three']])),
-                                  names=['first', 'second'])),
+                                  tuple(zip(*[[u'bar', u'bar', u'baz',
+                                               u'baz', u'baz'],
+                                              [u'one', u'two', u'one',
+                                               u'two', u'three']])),
+                                  names=[u'first', u'second'])),
                  dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
-                               columns=['A', 'B', 'A']),
-                 cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))),
-                 cat_and_float=DataFrame(dict(
-                     A=Categorical(['foo', 'bar', 'baz']),
-                     B=np.arange(3).astype(np.int64))),
+                               columns=[u'A', u'B', u'A']),
+                 cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
+                 cat_and_float=DataFrame({
+                     u'A': Categorical([u'foo', u'bar', u'baz']),
+                     u'B': np.arange(3).astype(np.int64)}),
                  mixed_dup=mixed_dup_df,
-                 dt_mixed_tzs=DataFrame(dict(
-                     A=Timestamp('20130102', tz='US/Eastern'),
-                     B=Timestamp('20130603', tz='CET')), index=range(5)),
+                 dt_mixed_tzs=DataFrame({
+                     u'A': Timestamp('20130102', tz='US/Eastern'),
+                     u'B': Timestamp('20130603', tz='CET')}, index=range(5))
                  )
 
-    mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int']))
-    mixed_dup_panel.items = ['ItemA', 'ItemA']
-    panel = dict(float=Panel(dict(ItemA=frame['float'],
-                                  ItemB=frame['float'] + 1)),
+    mixed_dup_panel = Panel({u'ItemA': frame[u'float'],
+                             u'ItemB': frame[u'int']})
+    mixed_dup_panel.items = [u'ItemA', u'ItemA']
+    panel = dict(float=Panel({u'ItemA': frame[u'float'],
+                              u'ItemB': frame[u'float'] + 1}),
                  dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64),
-                           items=['A', 'B', 'A']),
+                           items=[u'A', u'B', u'A']),
                  mixed_dup=mixed_dup_panel)
 
     return dict(series=series,
@@ -147,26 +151,38 @@ def create_pickle_data():
 
     # Pre-0.14.1 versions generated non-unpicklable mixed-type frames and
     # panels if their columns/items were non-unique.
-    if LooseVersion(pandas.__version__) < '0.14.1':
+    if _loose_version < '0.14.1':
         del data['frame']['mixed_dup']
         del data['panel']['mixed_dup']
+    if _loose_version < '0.17.0':
+        del data['series']['period']
+        del data['scalars']['period']
     return data
 
 
+def _u(x):
+    return {u(k): _u(x[k]) for k in x} if isinstance(x, dict) else x
+
+
 def create_msgpack_data():
     data = create_data()
-    if LooseVersion(pandas.__version__) < '0.17.0':
+    if _loose_version < '0.17.0':
         del data['frame']['mixed_dup']
         del data['panel']['mixed_dup']
         del data['frame']['dup']
         del data['panel']['dup']
+    if _loose_version < '0.18.0':
+        del data['series']['dt_tz']
+        del data['frame']['dt_mixed_tzs']
     # Not supported
     del data['sp_series']
     del data['sp_frame']
     del data['series']['cat']
+    del data['series']['period']
     del data['frame']['cat_onecol']
     del data['frame']['cat_and_float']
-    return data
+    del data['scalars']['period']
+    return _u(data)
 
 
 def platform_name():
@@ -199,7 +215,7 @@ def write_legacy_pickles(output_dir):
     print("created pickle file: %s" % pth)
 
 
-def write_legacy_msgpack(output_dir):
+def write_legacy_msgpack(output_dir, compress):
 
     version = pandas.__version__
 
@@ -208,9 +224,9 @@ def write_legacy_msgpack(output_dir):
     print("  pandas version: {0}".format(version))
     print("  output dir    : {0}".format(output_dir))
     print("  storage format: msgpack")
-
     pth = '{0}.msgpack'.format(platform_name())
-    to_msgpack(os.path.join(output_dir, pth), create_msgpack_data())
+    to_msgpack(os.path.join(output_dir, pth), create_msgpack_data(),
+               compress=compress)
 
     print("created msgpack file: %s" % pth)
 
@@ -219,17 +235,22 @@ def write_legacy_file():
     # force our cwd to be the first searched
     sys.path.insert(0, '.')
 
-    if len(sys.argv) != 3:
+    if not (3 <= len(sys.argv) <= 4):
         exit("Specify output directory and storage type: generate_legacy_"
-             "storage_files.py <output_dir> <storage_type>")
+             "storage_files.py <output_dir> <storage_type> "
+             "<msgpack_compress_type>")
 
     output_dir = str(sys.argv[1])
     storage_type = str(sys.argv[2])
+    try:
+        compress_type = str(sys.argv[3])
+    except IndexError:
+        compress_type = None
 
     if storage_type == 'pickle':
         write_legacy_pickles(output_dir=output_dir)
     elif storage_type == 'msgpack':
-        write_legacy_msgpack(output_dir=output_dir)
+        write_legacy_msgpack(output_dir=output_dir, compress=compress_type)
     else:
         exit("storage_type must be one of {'pickle', 'msgpack'}")
 
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index d1c05069b4172..d0e7d00d79cb0 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -331,11 +331,16 @@ def setUp(self):
             'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
             'D': date_range('1/1/2009', periods=5),
             'E': [0., 1, Timestamp('20100101'), 'foo', 2.],
+            'F': [Timestamp('20130102', tz='US/Eastern')] * 2 +
+                 [Timestamp('20130603', tz='CET')] * 3,
+            'G': [Timestamp('20130102', tz='US/Eastern')] * 5
         }
 
         self.d['float'] = Series(data['A'])
         self.d['int'] = Series(data['B'])
         self.d['mixed'] = Series(data['E'])
+        self.d['dt_tz_mixed'] = Series(data['F'])
+        self.d['dt_tz'] = Series(data['G'])
 
     def test_basic(self):
 
@@ -357,13 +362,14 @@ def setUp(self):
             'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
             'D': date_range('1/1/2009', periods=5),
             'E': [0., 1, Timestamp('20100101'), 'foo', 2.],
+            'F': [Timestamp('20130102', tz='US/Eastern')] * 5,
+            'G': [Timestamp('20130603', tz='CET')] * 5
         }
 
         self.frame = {
             'float': DataFrame(dict(A=data['A'], B=Series(data['A']) + 1)),
             'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)),
-            'mixed': DataFrame(dict([(k, data[k])
-                                     for k in ['A', 'B', 'C', 'D']]))}
+            'mixed': DataFrame(data)}
 
         self.panel = {
             'float': Panel(dict(ItemA=self.frame['float'],
@@ -713,6 +719,11 @@ def read_msgpacks(self, version):
         pth = tm.get_data_path('legacy_msgpack/{0}'.format(str(version)))
         n = 0
         for f in os.listdir(pth):
+            # GH12142 0.17 files packed in P2 can't be read in P3
+            if (compat.PY3 and
+                    version.startswith('0.17.') and
+                    f.split('.')[-4][-1] == '2'):
+                continue
             vf = os.path.join(pth, f)
             self.compare(vf, version)
             n += 1