|
15 | 15 | import datetime
|
16 | 16 | import functools
|
17 | 17 | from functools import partial
|
18 |
| -import glob |
19 | 18 | import gzip
|
20 | 19 | import io
|
21 | 20 | import os
|
|
53 | 52 | )
|
54 | 53 |
|
55 | 54 |
|
56 |
| -@pytest.fixture(scope="module") |
| 55 | +@pytest.fixture |
57 | 56 | def current_pickle_data():
|
58 | 57 | # our current version pickle data
|
59 | 58 | from pandas.tests.io.generate_legacy_storage_files import create_pickle_data
|
@@ -82,15 +81,6 @@ def compare_element(result, expected, typ):
|
82 | 81 | comparator(result, expected)
|
83 | 82 |
|
84 | 83 |
|
85 |
| -legacy_dirname = os.path.join(os.path.dirname(__file__), "data", "legacy_pickle") |
86 |
| -files = glob.glob(os.path.join(legacy_dirname, "*", "*.pickle")) |
87 |
| - |
88 |
| - |
89 |
| -@pytest.fixture(params=files) |
90 |
| -def legacy_pickle(request, datapath): |
91 |
| - return datapath(request.param) |
92 |
| - |
93 |
| - |
94 | 84 | # ---------------------
|
95 | 85 | # tests
|
96 | 86 | # ---------------------
|
@@ -125,50 +115,54 @@ def test_flatten_buffer(data):
|
125 | 115 | assert result.shape == (result.nbytes,)
|
126 | 116 |
|
127 | 117 |
|
128 |
| -def test_pickles(legacy_pickle): |
| 118 | +def test_pickles(datapath): |
129 | 119 | if not is_platform_little_endian():
|
130 | 120 | pytest.skip("known failure on non-little endian")
|
131 | 121 |
|
132 |
| - data = pd.read_pickle(legacy_pickle) |
133 |
| - |
134 |
| - for typ, dv in data.items(): |
135 |
| - for dt, result in dv.items(): |
136 |
| - expected = data[typ][dt] |
137 |
| - |
138 |
| - if typ == "series" and dt == "ts": |
139 |
| - # GH 7748 |
140 |
| - tm.assert_series_equal(result, expected) |
141 |
| - assert result.index.freq == expected.index.freq |
142 |
| - assert not result.index.freq.normalize |
143 |
| - tm.assert_series_equal(result > 0, expected > 0) |
144 |
| - |
145 |
| - # GH 9291 |
146 |
| - freq = result.index.freq |
147 |
| - assert freq + Day(1) == Day(2) |
148 |
| - |
149 |
| - res = freq + pd.Timedelta(hours=1) |
150 |
| - assert isinstance(res, pd.Timedelta) |
151 |
| - assert res == pd.Timedelta(days=1, hours=1) |
152 |
| - |
153 |
| - res = freq + pd.Timedelta(nanoseconds=1) |
154 |
| - assert isinstance(res, pd.Timedelta) |
155 |
| - assert res == pd.Timedelta(days=1, nanoseconds=1) |
156 |
| - elif typ == "index" and dt == "period": |
157 |
| - tm.assert_index_equal(result, expected) |
158 |
| - assert isinstance(result.freq, MonthEnd) |
159 |
| - assert result.freq == MonthEnd() |
160 |
| - assert result.freqstr == "M" |
161 |
| - tm.assert_index_equal(result.shift(2), expected.shift(2)) |
162 |
| - elif typ == "series" and dt in ("dt_tz", "cat"): |
163 |
| - tm.assert_series_equal(result, expected) |
164 |
| - elif typ == "frame" and dt in ( |
165 |
| - "dt_mixed_tzs", |
166 |
| - "cat_onecol", |
167 |
| - "cat_and_float", |
168 |
| - ): |
169 |
| - tm.assert_frame_equal(result, expected) |
170 |
| - else: |
171 |
| - compare_element(result, expected, typ) |
| 122 | + # For loop for compat with --strict-data-files |
| 123 | + for legacy_pickle in Path(__file__).parent.glob("data/legacy_pickle/*/*.p*kl*"): |
| 124 | + legacy_pickle = datapath(legacy_pickle) |
| 125 | + |
| 126 | + data = pd.read_pickle(legacy_pickle) |
| 127 | + |
| 128 | + for typ, dv in data.items(): |
| 129 | + for dt, result in dv.items(): |
| 130 | + expected = data[typ][dt] |
| 131 | + |
| 132 | + if typ == "series" and dt == "ts": |
| 133 | + # GH 7748 |
| 134 | + tm.assert_series_equal(result, expected) |
| 135 | + assert result.index.freq == expected.index.freq |
| 136 | + assert not result.index.freq.normalize |
| 137 | + tm.assert_series_equal(result > 0, expected > 0) |
| 138 | + |
| 139 | + # GH 9291 |
| 140 | + freq = result.index.freq |
| 141 | + assert freq + Day(1) == Day(2) |
| 142 | + |
| 143 | + res = freq + pd.Timedelta(hours=1) |
| 144 | + assert isinstance(res, pd.Timedelta) |
| 145 | + assert res == pd.Timedelta(days=1, hours=1) |
| 146 | + |
| 147 | + res = freq + pd.Timedelta(nanoseconds=1) |
| 148 | + assert isinstance(res, pd.Timedelta) |
| 149 | + assert res == pd.Timedelta(days=1, nanoseconds=1) |
| 150 | + elif typ == "index" and dt == "period": |
| 151 | + tm.assert_index_equal(result, expected) |
| 152 | + assert isinstance(result.freq, MonthEnd) |
| 153 | + assert result.freq == MonthEnd() |
| 154 | + assert result.freqstr == "M" |
| 155 | + tm.assert_index_equal(result.shift(2), expected.shift(2)) |
| 156 | + elif typ == "series" and dt in ("dt_tz", "cat"): |
| 157 | + tm.assert_series_equal(result, expected) |
| 158 | + elif typ == "frame" and dt in ( |
| 159 | + "dt_mixed_tzs", |
| 160 | + "cat_onecol", |
| 161 | + "cat_and_float", |
| 162 | + ): |
| 163 | + tm.assert_frame_equal(result, expected) |
| 164 | + else: |
| 165 | + compare_element(result, expected, typ) |
172 | 166 |
|
173 | 167 |
|
174 | 168 | def python_pickler(obj, path):
|
@@ -580,9 +574,15 @@ def test_pickle_big_dataframe_compression(protocol, compression):
|
580 | 574 | tm.assert_frame_equal(df, result)
|
581 | 575 |
|
582 | 576 |
|
583 |
| -def test_pickle_frame_v124_unpickle_130(): |
| 577 | +def test_pickle_frame_v124_unpickle_130(datapath): |
584 | 578 | # GH#42345 DataFrame created in 1.2.x, unpickle in 1.3.x
|
585 |
| - path = os.path.join(legacy_dirname, "1.2.4", "empty_frame_v1_2_4-GH#42345.pkl") |
| 579 | + path = datapath( |
| 580 | + Path(__file__).parent, |
| 581 | + "data", |
| 582 | + "legacy_pickle", |
| 583 | + "1.2.4", |
| 584 | + "empty_frame_v1_2_4-GH#42345.pkl", |
| 585 | + ) |
586 | 586 | with open(path, "rb") as fd:
|
587 | 587 | df = pickle.load(fd)
|
588 | 588 |
|
|
0 commit comments