|
19 | 19 | import matplotlib.dates as mdates |
20 | 20 | import matplotlib.pyplot as plt |
21 | 21 | import numpy as np |
| 22 | +import tqdm.notebook |
22 | 23 | import uproot |
23 | 24 |
|
24 | 25 |
|
@@ -311,13 +312,16 @@ def extract_metadata(fname_and_treename: str, custom_func) -> dict: |
311 | 312 | meta.update({"custom_meta": custom_func(f)}) |
312 | 313 | return {fname: meta} |
313 | 314 |
|
314 | | - preprocess_input = dask.bag.from_sequence(files_to_preprocess, partition_size=1) |
315 | 315 | print(f"pre-processing {len(files_to_preprocess)} file(s)") |
316 | | - futures = preprocess_input.map(functools.partial(extract_metadata, custom_func=custom_func)) |
317 | | - result = client.compute(futures).result() |
| 316 | + tasks = client.map(functools.partial(extract_metadata, custom_func=custom_func), files_to_preprocess) |
| 317 | + futures = client.compute(tasks) |
| 318 | + |
| 319 | + with tqdm.notebook.tqdm(total=len(futures)) as pbar: |
| 320 | + for _ in dask.distributed.as_completed(futures): |
| 321 | + pbar.update(1) |
318 | 322 |
|
319 | 323 | # turn into dict for easier use |
320 | | - result_dict = {k: v for res in result for k, v in res.items()} |
| 324 | + result_dict = {k: v for res in [f.result() for f in futures] for k, v in res.items()} |
321 | 325 |
|
322 | 326 | # join back together per-file information with fileset-level information and turn into WorkItem list for coffea |
323 | 327 | workitems = [] |
@@ -410,5 +414,13 @@ def sum_output(a, b): |
410 | 414 | ) |
411 | 415 |
|
412 | 416 | workitems_bag = dask.bag.from_sequence(workitems, partition_size=1) |
413 | | - futures = workitems_bag.map(run_analysis).fold(sum_output) |
414 | | - return client.compute(futures).result() |
| 417 | + tasks = workitems_bag.map(run_analysis).to_delayed() |
| 418 | + futures = client.compute(tasks) |
| 419 | + workitems_bag = dask.bag.from_delayed(futures) |
| 420 | + res = client.compute(workitems_bag.fold(sum_output)) |
| 421 | + |
| 422 | + with tqdm.notebook.tqdm(total=len(futures)) as pbar: |
| 423 | + for _ in dask.distributed.as_completed(futures): |
| 424 | + pbar.update(1) |
| 425 | + |
| 426 | + return res.result() |
0 commit comments