@@ -20,7 +20,12 @@ def forward(self, input):
2020 return torch .ops .aten .nonzero .default (input )
2121
2222
23- class NonDDSModel (torch .nn .Module ):
23+ class DDSOpWithReductionOpModel (torch .nn .Module ):
24+ """
25+ DDSOpWithReductionOpModel is a model that contains DDS op + reduction op.
26+ Since nonzero requires output allocator, this model will use output allocator by default.
27+ """
28+
2429 def forward (self , inputs ):
2530 out = torch .ops .aten .nonzero .default (inputs )
2631 out = torch .ops .aten .sum .dim_IntList (out , 0 )
@@ -251,9 +256,9 @@ def test_combination_of_cg_and_oa(self, _, use_python_runtime):
251256 out = cudagraphs_module (* inputs )
252257
253258
254- class TestOutputAllocatorNonDDSModel (TestCase ):
259+ class TestOutputAllocatorDDSOpWithReductionOpModel (TestCase ):
255260 """
256- The NonDDSModel is a model that contains DDS op + reduction op.
261+ The DDSOpWithReductionOpModel is a model that contains DDS op + reduction op.
257262 """
258263
259264 @parameterized .expand (
@@ -263,7 +268,7 @@ class TestOutputAllocatorNonDDSModel(TestCase):
263268 ]
264269 )
265270 def test_cudagraphs_and_output_allocator (self , _ , use_python_runtime ):
266- model = NonDDSModel ().eval ().cuda ()
271+ model = DDSOpWithReductionOpModel ().eval ().cuda ()
267272 inputs = (torch .randint (low = 0 , high = 3 , size = (10 ,), dtype = torch .int ).to ("cuda" ),)
268273 compiled_model = torch_tensorrt .compile (
269274 model ,
@@ -302,9 +307,9 @@ def test_cudagraphs_and_output_allocator(self, _, use_python_runtime):
302307 )
303308 def test_default (self , _ , use_python_runtime ):
304309 """
305- NonDDS models use standard execution with cudagraphs=False by default .
310+ The DDSOpWithReductionOpModel is a model that contains nonzero op + reduction op, in which nonzero op requires output allocator .
306311 """
307- model = NonDDSModel ().eval ().cuda ()
312+ model = DDSOpWithReductionOpModel ().eval ().cuda ()
308313 inputs = (torch .randint (low = 0 , high = 3 , size = (10 ,), dtype = torch .int ).to ("cuda" ),)
309314 compiled_model = torch_tensorrt .compile (
310315 model ,
@@ -313,11 +318,11 @@ def test_default(self, _, use_python_runtime):
313318 min_block_size = 1 ,
314319 use_python_runtime = use_python_runtime ,
315320 )
316- standard_out = compiled_model (* inputs )
321+ oa_out = compiled_model (* inputs )
317322 ref_out = model (* inputs )
318323
319324 self .assertAlmostEqual (
320- float (torch .max (torch .abs (ref_out - standard_out ))),
325+ float (torch .max (torch .abs (ref_out - oa_out ))),
321326 0 ,
322327 DECIMALS_OF_AGREEMENT ,
323328 msg = "Default Output Allocator runtime outputs don't match with the original model." ,
@@ -330,7 +335,7 @@ def test_default(self, _, use_python_runtime):
330335 ]
331336 )
332337 def test_combination_of_cg_and_oa (self , _ , use_python_runtime ):
333- model = NonDDSModel ().eval ().cuda ()
338+ model = DDSOpWithReductionOpModel ().eval ().cuda ()
334339 inputs = (torch .randint (low = 0 , high = 3 , size = (10 ,), dtype = torch .int ).to ("cuda" ),)
335340 compiled_model = torch_tensorrt .compile (
336341 model ,
0 commit comments