@@ -164,35 +164,35 @@ files. Each file in the directory represents a different year of the entire data
164164.. ipython :: python
165165 :okwarning:
166166
167- import pathlib
167+ import glob
168+ import tempfile
168169
169170 N = 12
170171 starts = [f " 20 { i:>02d } -01-01 " for i in range (N)]
171172 ends = [f " 20 { i:>02d } -12-13 " for i in range (N)]
172173
173- pathlib.Path( " data/timeseries " ).mkdir( exist_ok = True )
174+ tmpdir = tempfile.TemporaryDirectory( ignore_cleanup_errors = True )
174175
175176 for i, (start, end) in enumerate (zip (starts, ends)):
176177 ts = make_timeseries(start = start, end = end, freq = " 1min" , seed = i)
177- ts.to_parquet(f " data/timeseries /ts-{ i:0>2d } .parquet " )
178+ ts.to_parquet(f " { tmpdir.name } /ts- { i:0>2d } .parquet " )
178179
179180
180181 ::
181182
182- data
183- └── timeseries
184- ├── ts-00.parquet
185- ├── ts-01.parquet
186- ├── ts-02.parquet
187- ├── ts-03.parquet
188- ├── ts-04.parquet
189- ├── ts-05.parquet
190- ├── ts-06.parquet
191- ├── ts-07.parquet
192- ├── ts-08.parquet
193- ├── ts-09.parquet
194- ├── ts-10.parquet
195- └── ts-11.parquet
183+ tmpdir
184+ ├── ts-00.parquet
185+ ├── ts-01.parquet
186+ ├── ts-02.parquet
187+ ├── ts-03.parquet
188+ ├── ts-04.parquet
189+ ├── ts-05.parquet
190+ ├── ts-06.parquet
191+ ├── ts-07.parquet
192+ ├── ts-08.parquet
193+ ├── ts-09.parquet
194+ ├── ts-10.parquet
195+ └── ts-11.parquet
196196
197197Now we'll implement an out-of-core :meth: `pandas.Series.value_counts `. The peak memory usage of this
198198workflow is the single largest chunk, plus a small series storing the unique value
@@ -202,13 +202,18 @@ work for arbitrary-sized datasets.
202202.. ipython :: python
203203
204204 %% time
205- files = pathlib.Path( " data/timeseries/ " ).glob( " ts*.parquet" )
205+ files = glob.iglob( f " { tmpdir.name } / ts*.parquet" )
206206 counts = pd.Series(dtype = int )
207207 for path in files:
208208 df = pd.read_parquet(path)
209209 counts = counts.add(df[" name" ].value_counts(), fill_value = 0 )
210210 counts.astype(int )
211211
212+ .. ipython :: python
213+ :suppress:
214+
215+ tmpdir.cleanup()
216+
212217 Some readers, like :meth: `pandas.read_csv `, offer parameters to control the
213218``chunksize `` when reading a single file.
214219
0 commit comments