Skip to content

Commit 0f48b0d

Browse files
committed
Update on "[dynamo][pr_time_benchmark] Add dynamo benchmark to stress test inlining"
cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx chenyang78 kadeng chauhang amjames [ghstack-poisoned]
1 parent efa51e8 commit 0f48b0d

File tree

1 file changed

+90
-0
lines changed

1 file changed

+90
-0
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import sys
2+
3+
from benchmark_base import BenchmarkBase
4+
5+
import torch
6+
import torch.nn as nn
7+
from torch._inductor.utils import fresh_inductor_cache
8+
9+
10+
class NestedModule(nn.Module):
11+
def __init__(self, depth=3, width=4):
12+
super().__init__()
13+
self.depth = depth
14+
self.width = width
15+
16+
self.relu_a = nn.ReLU()
17+
self.relu_b = nn.ReLU()
18+
19+
sub_mods = []
20+
if depth > 0:
21+
for i in range(width):
22+
sub_mods.append(NestedModule(depth - 1, width))
23+
else:
24+
for i in range(width):
25+
sub_mods.append(nn.ReLU())
26+
self.sub_mods = nn.Sequential(*sub_mods)
27+
self.a = 2
28+
29+
def forward(self, x):
30+
x = self.relu_a(x)
31+
x = x + self.sub_mods(x)
32+
return x + self.relu_b(x) + self.a
33+
34+
35+
class Benchmark(BenchmarkBase):
36+
def __init__(
37+
self,
38+
ModuleClass,
39+
backend="eager",
40+
is_gpu=False,
41+
dynamic=False,
42+
):
43+
self.ModuleClass = ModuleClass
44+
self._name = ModuleClass.__name__
45+
self._is_gpu = is_gpu
46+
47+
super().__init__(
48+
category="basic",
49+
backend=backend,
50+
device="cuda" if self._is_gpu else "cpu",
51+
dynamic=dynamic,
52+
)
53+
54+
def name(self):
55+
prefix = f"{self.category()}_{self._name}_{self.backend()}"
56+
return prefix
57+
58+
def _prepare_once(self):
59+
self.m = self.ModuleClass()
60+
torch.set_float32_matmul_precision("high")
61+
self.input = torch.ones(10, device=self.device())
62+
63+
def _prepare(self):
64+
torch._dynamo.reset()
65+
66+
def _work(self):
67+
# enable_cpp_symbolic_shape_guards has impact on this benchmark
68+
# Keep using False value for consistency.
69+
with (
70+
fresh_inductor_cache(),
71+
):
72+
opt_m = torch.compile(backend=self.backend(), dynamic=self.is_dynamic())(
73+
self.m.cuda() if self._is_gpu else self.m
74+
)
75+
opt_m(self.input)
76+
77+
78+
def main():
79+
result_path = sys.argv[1]
80+
benchmarks = [
81+
Benchmark(NestedModule),
82+
]
83+
for b in benchmarks:
84+
b.enable_compile_time_instruction_count().collect_all().append_results(
85+
result_path
86+
)
87+
88+
89+
if __name__ == "__main__":
90+
main()

0 commit comments

Comments
 (0)