|
1 | | -import argparse |
2 | | -import itertools |
3 | 1 | import os |
4 | | -import shutil |
5 | 2 | import subprocess |
6 | | -import time |
7 | | -from datetime import datetime |
8 | | -from pathlib import Path |
9 | | -from typing import List |
10 | | - |
11 | | -import yaml |
12 | | -from git import Repo |
13 | | - |
14 | | -from ..utils import dump_output, get_output_dir, get_output_json |
15 | | -from .result_analyzer import analyze |
16 | 3 |
|
17 | | -# Expected WORK_DIR structure |
18 | | -# WORK_DIR/ |
19 | | -# |---examples/ |
20 | | -# |---pytorch-<ver1>-cuda<ver1>/ |
21 | | -# |---run.sh |
22 | | -# |---mnist/ |
23 | | -# |---mnist-hogwild/ |
24 | | -# |---<other-benchmarks> |
25 | | -# |---pytorch-<ver2>-cuda<ver2>/ |
26 | | -# |---summary.csv |
| 4 | +from typing import List |
27 | 5 |
|
28 | 6 | BM_NAME = "release-test" |
29 | 7 | EXAMPLE_URL = "https://github.com/pytorch/examples.git" |
30 | 8 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
31 | | -DEFAULT_CONFIG_PATH = os.path.join( |
32 | | - os.path.dirname(os.path.abspath(__file__)), "configs" |
33 | | -) |
34 | | -RUN_TEMPLATE = """ |
35 | | -# GENERATED BY userbenchmark/release-test/__init__.py. DO NOT EDIT! |
36 | | -bash {RELEASE_TEST_ROOT}/setup_env.sh '{CUDA_VERSION}' '{MAGMA_VERSION}' '{PYTORCH_VERSION}' '{PYTORCH_CHANNEL}' '{WORK_DIR}' |
37 | | -bash {RELEASE_TEST_ROOT}/run_release_test.sh '{CUDA_VERSION}' '{RESULT_DIR}' |
38 | | -""" |
39 | | - |
40 | | - |
41 | | -def get_timestamp(): |
42 | | - return datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S") |
43 | | - |
44 | | - |
45 | | -def get_work_dir(output_dir): |
46 | | - work_dir = output_dir.joinpath(f"run-{get_timestamp()}") |
47 | | - work_dir.mkdir(exist_ok=True, parents=True) |
48 | | - return work_dir |
49 | | - |
50 | | - |
51 | | -def generate_test_scripts(config, work_dir): |
52 | | - assert "cuda" in config and isinstance( |
53 | | - config["cuda"], list |
54 | | - ), f"Expected CUDA config list, but not found." |
55 | | - assert "pytorch" in config and isinstance( |
56 | | - config["pytorch"], list |
57 | | - ), f"Exptected pytorch version list, but not found." |
58 | | - bm_matrix = [config["cuda"], config["pytorch"]] |
59 | | - run_scripts = {} |
60 | | - for cuda, pytorch in itertools.product(*bm_matrix): |
61 | | - run_key = f"pytorch-{pytorch['version']}-cuda-{cuda['version']}" |
62 | | - run_script = RUN_TEMPLATE.format( |
63 | | - RELEASE_TEST_ROOT=CURRENT_DIR, |
64 | | - CUDA_VERSION=cuda["version"], |
65 | | - MAGMA_VERSION=cuda["magma_version"], |
66 | | - PYTORCH_VERSION=pytorch["version"], |
67 | | - PYTORCH_CHANNEL=pytorch["conda_channel"], |
68 | | - WORK_DIR=work_dir, |
69 | | - RESULT_DIR=work_dir.joinpath(run_key), |
70 | | - ) |
71 | | - run_scripts[run_key] = run_script |
72 | | - return run_scripts |
73 | | - |
74 | | - |
75 | | -def dump_test_scripts(run_scripts, work_dir): |
76 | | - for run_key, run_script in run_scripts.items(): |
77 | | - run_script_loc = work_dir.joinpath(run_key) |
78 | | - run_script_loc.mkdir(exist_ok=True) |
79 | | - with open(run_script_loc.joinpath("run.sh"), "w") as rs: |
80 | | - rs.write(run_script) |
81 | | - |
82 | | - |
83 | | -def dump_result_to_json(metrics): |
84 | | - result = get_output_json(BM_NAME, metrics) |
85 | | - dump_output(BM_NAME, result) |
86 | | - |
87 | | - |
88 | | -def run_benchmark(run_scripts, work_dir): |
89 | | - for run_key, _rscript in run_scripts.items(): |
90 | | - run_script_path = work_dir.joinpath(run_key, "run.sh") |
91 | | - # run the benchmark |
92 | | - print(f"Running benchmark {run_key} ...") |
93 | | - subprocess.check_call(["bash", str(run_script_path)]) |
94 | | - |
95 | | - |
96 | | -def get_config(config_name: str): |
97 | | - if os.path.exists(os.path.join(DEFAULT_CONFIG_PATH, config_name)): |
98 | | - config_name = os.path.join(DEFAULT_CONFIG_PATH, config_name) |
99 | | - elif os.path.exists(os.path.join(DEFAULT_CONFIG_PATH, f"{config_name}.yaml")): |
100 | | - config_name = os.path.join(DEFAULT_CONFIG_PATH, f"{config_name}.yaml") |
101 | | - else: |
102 | | - raise ValueError( |
103 | | - f"Can't find config name {config_name} in config path {DEFAULT_CONFIG_PATH}." |
104 | | - ) |
105 | | - with open(config_name, "r") as yfile: |
106 | | - config = yaml.safe_load(yfile) |
107 | | - return config |
108 | | - |
109 | | - |
110 | | -def parse_args(args): |
111 | | - parser = argparse.ArgumentParser() |
112 | | - parser.add_argument( |
113 | | - "--config", "-c", default="1.12.1", type=str, help="Config for release testing" |
114 | | - ) |
115 | | - parser.add_argument( |
116 | | - "--dry-run", |
117 | | - action="store_true", |
118 | | - help="Only generate the test scripts. Do not run the benchmark.", |
119 | | - ) |
120 | | - parser.add_argument( |
121 | | - "--analyze", |
122 | | - type=str, |
123 | | - help="Only analyze the result of the specified work directory.", |
124 | | - ) |
125 | | - args = parser.parse_args(args) |
126 | | - return args |
127 | | - |
128 | | - |
129 | | -def prepare_release_tests(args: argparse.Namespace, work_dir: Path): |
130 | | - config = get_config(args.config) |
131 | | - run_scripts = generate_test_scripts(config, work_dir) |
132 | | - dump_test_scripts(run_scripts, work_dir) |
133 | | - # clone the examples repo |
134 | | - Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) |
135 | | - return run_scripts |
136 | | - |
137 | | - |
138 | | -def cleanup_release_tests(work_dir): |
139 | | - examples_path = work_dir.joinpath("examples") |
140 | | - if examples_path.exists(): |
141 | | - shutil.rmtree(examples_path) |
142 | 9 |
|
143 | 10 |
|
144 | 11 | def run(args: List[str]): |
145 | | - args = parse_args(args) |
146 | | - if args.analyze: |
147 | | - analyze(args.analyze) |
148 | | - return |
149 | | - work_dir = get_work_dir(get_output_dir(BM_NAME)) |
150 | | - run_scripts = prepare_release_tests(args=args, work_dir=work_dir) |
151 | | - if not args.dry_run: |
152 | | - run_benchmark(run_scripts, work_dir) |
153 | | - metrics = analyze(work_dir) |
154 | | - dump_result_to_json(metrics) |
155 | | - cleanup_release_tests(work_dir) |
| 12 | + subprocess.check_call(["bash", f"{CURRENT_DIR}/run_release_test.sh"]) |
0 commit comments