diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index e32bb0f110252..e7070585a4b9c 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3393,15 +3393,15 @@ both on the writing (serialization), and reading (deserialization). .. warning:: - This is a very new feature of pandas. We intend to provide certain - optimizations in the io of the ``msgpack`` data. Since this is marked - as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. + The msgpack format is deprecated as of 0.25 and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of pandas objects. .. warning:: :func:`read_msgpack` is only guaranteed backwards compatible back to pandas version 0.20.3 .. ipython:: python + :okwarning: df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) df.to_msgpack('foo.msg') @@ -3411,6 +3411,7 @@ both on the writing (serialization), and reading (deserialization). You can pass a list of objects and you will receive them back on deserialization. .. ipython:: python + :okwarning: pd.to_msgpack('foo.msg', df, 'foo', np.array([1, 2, 3]), s) pd.read_msgpack('foo.msg') @@ -3418,6 +3419,7 @@ You can pass a list of objects and you will receive them back on deserialization You can pass ``iterator=True`` to iterate over the unpacked results: .. ipython:: python + :okwarning: for o in pd.read_msgpack('foo.msg', iterator=True): print(o) @@ -3425,6 +3427,7 @@ You can pass ``iterator=True`` to iterate over the unpacked results: You can pass ``append=True`` to the writer to append to an existing pack: .. ipython:: python + :okwarning: df.to_msgpack('foo.msg', append=True) pd.read_msgpack('foo.msg') @@ -3435,6 +3438,7 @@ can pack arbitrary collections of Python lists, dicts, scalars, while intermixin pandas objects. .. ipython:: python + :okwarning: pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'}, {'scalar': 1.}, {'s': s}]}) @@ -3453,14 +3457,16 @@ Read/write API Msgpacks can also be read from and written to strings. .. ipython:: python + :okwarning: df.to_msgpack() Furthermore you can concatenate the strings to produce a list of the original objects. .. ipython:: python + :okwarning: - pd.read_msgpack(df.to_msgpack() + s.to_msgpack()) + pd.read_msgpack(df.to_msgpack() + s.to_msgpack()) .. _io.hdf5: diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index 0614de82cbcd0..ab48594ddadab 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -829,6 +829,7 @@ Experimental Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. .. ipython:: python + :okwarning: df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) df.to_msgpack('foo.msg') @@ -841,6 +842,7 @@ Experimental You can pass ``iterator=True`` to iterator over the unpacked results .. ipython:: python + :okwarning: for o in pd.read_msgpack('foo.msg', iterator=True): print(o) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 28bf796be404a..0227d89f0d17f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -590,6 +590,12 @@ by a ``Series`` or ``DataFrame`` with sparse values. The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`). +msgpack format +^^^^^^^^^^^^^^ + +The msgpack format is deprecated as of 0.25 and will be removed in a future version. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. (:issue:`27084`) + + Other deprecations ^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1af3e9449f3da..3bc7bbb633aed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2418,8 +2418,11 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): """ Serialize object to input file path using msgpack format. - THIS IS AN EXPERIMENTAL LIBRARY and the storage format - may not be stable until a future release. + .. deprecated:: 0.25.0 + + to_msgpack is deprecated and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of + pandas objects. Parameters ---------- diff --git a/pandas/io/packers.py b/pandas/io/packers.py index cef0af3edbb20..30e51e62aa764 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -78,8 +78,11 @@ def to_msgpack(path_or_buf, *args, **kwargs): """ msgpack (serialize) object to input file path - THIS IS AN EXPERIMENTAL LIBRARY and the storage format - may not be stable until a future release. + .. deprecated:: 0.25.0 + + to_msgpack is deprecated and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of + pandas objects. Parameters ---------- @@ -92,6 +95,12 @@ def to_msgpack(path_or_buf, *args, **kwargs): compress : type of compressor (zlib or blosc), default to None (no compression) """ + warnings.warn("to_msgpack is deprecated and will be removed in a " + "future version.\n" + "It is recommended to use pyarrow for on-the-wire " + "transmission of pandas objects.", + FutureWarning, stacklevel=3) + global compressor compressor = kwargs.pop('compress', None) append = kwargs.pop('append', None) @@ -121,8 +130,11 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): Load msgpack pandas object from the specified file path - THIS IS AN EXPERIMENTAL LIBRARY and the storage format - may not be stable until a future release. + .. deprecated:: 0.25.0 + + read_msgpack is deprecated and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of + pandas objects. Parameters ---------- @@ -140,6 +152,12 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): read_msgpack is only guaranteed to be backwards compatible to pandas 0.20.3. """ + warnings.warn("The read_msgpack is deprecated and will be removed in a " + "future version.\n" + "It is recommended to use pyarrow for on-the-wire " + "transmission of pandas objects.", + FutureWarning, stacklevel=3) + path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index f580dc460fd68..04faf5aee4b6d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -45,6 +45,7 @@ def __fspath__(self): # https://github.com/cython/cython/issues/1720 @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCommonIOCapabilities: data1 = """index,A,B,C,D foo,2,3,4,5 diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 59fa9fbd02da1..203b550b8936a 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -2,7 +2,7 @@ import glob from io import BytesIO import os -from warnings import catch_warnings +from warnings import catch_warnings, filterwarnings import numpy as np import pytest @@ -83,6 +83,7 @@ def check_arbitrary(a, b): assert(a == b) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestPackers: def setup_method(self, method): @@ -97,6 +98,7 @@ def encode_decode(self, x, compress=None, **kwargs): return read_msgpack(p, **kwargs) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestAPI(TestPackers): def test_string_io(self): @@ -159,6 +161,7 @@ def __init__(self): read_msgpack(path_or_buf=A()) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestNumpy(TestPackers): def test_numpy_scalar_float(self): @@ -277,6 +280,7 @@ def test_list_mixed(self): tm.assert_almost_equal(tuple(x), x_rec) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestBasic(TestPackers): def test_timestamp(self): @@ -322,6 +326,7 @@ def test_intervals(self): assert i == i_rec +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestIndex(TestPackers): def setup_method(self, method): @@ -387,6 +392,7 @@ def categorical_index(self): tm.assert_frame_equal(result, df) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestSeries(TestPackers): def setup_method(self, method): @@ -437,6 +443,7 @@ def test_basic(self): assert_series_equal(i, i_rec) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCategorical(TestPackers): def setup_method(self, method): @@ -460,6 +467,7 @@ def test_basic(self): assert_categorical_equal(i, i_rec) +@pytest.mark.filterwarnings("ignore:msgpack:FutureWarning") class TestNDFrame(TestPackers): def setup_method(self, method): @@ -549,6 +557,7 @@ def test_dataframe_duplicate_column_names(self): @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning") @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning") +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestSparse(TestPackers): def _check_roundtrip(self, obj, comparator, **kwargs): @@ -595,6 +604,7 @@ def test_sparse_frame(self): check_frame_type=True) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCompression(TestPackers): """See https://github.com/pandas-dev/pandas/pull/9783 """ @@ -676,18 +686,21 @@ def decompress(ob): with monkeypatch.context() as m, \ tm.assert_produces_warning(PerformanceWarning) as ws: m.setattr(compress_module, 'decompress', decompress) - i_rec = self.encode_decode(self.frame, compress=compress) - for k in self.frame.keys(): - - value = i_rec[k] - expected = self.frame[k] - assert_frame_equal(value, expected) - # make sure that we can write to the new frames even though - # we needed to copy the data - for block in value._data.blocks: - assert block.values.flags.writeable - # mutate the data in some way - block.values[0] += rhs[block.dtype] + + with catch_warnings(): + filterwarnings('ignore', category=FutureWarning) + i_rec = self.encode_decode(self.frame, compress=compress) + for k in self.frame.keys(): + + value = i_rec[k] + expected = self.frame[k] + assert_frame_equal(value, expected) + # make sure that we can write to the new frames even though + # we needed to copy the data + for block in value._data.blocks: + assert block.values.flags.writeable + # mutate the data in some way + block.values[0] += rhs[block.dtype] for w in ws: # check the messages from our warnings @@ -715,14 +728,18 @@ def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch): def _test_small_strings_no_warn(self, compress): empty = np.array([], dtype='uint8') with tm.assert_produces_warning(None): - empty_unpacked = self.encode_decode(empty, compress=compress) + with catch_warnings(): + filterwarnings('ignore', category=FutureWarning) + empty_unpacked = self.encode_decode(empty, compress=compress) tm.assert_numpy_array_equal(empty_unpacked, empty) assert empty_unpacked.flags.writeable char = np.array([ord(b'a')], dtype='uint8') with tm.assert_produces_warning(None): - char_unpacked = self.encode_decode(char, compress=compress) + with catch_warnings(): + filterwarnings('ignore', category=FutureWarning) + char_unpacked = self.encode_decode(char, compress=compress) tm.assert_numpy_array_equal(char_unpacked, char) assert char_unpacked.flags.writeable @@ -794,6 +811,7 @@ def test_readonly_axis_zlib_to_sql(self): assert_frame_equal(expected, result) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestEncoding(TestPackers): def setup_method(self, method): @@ -839,6 +857,7 @@ def legacy_packer(request, datapath): @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestMsgpack: """ How to add msgpack tests: