4040
4141
4242class ToyLinearModel (torch .nn .Module ):
43- def __init__ (self , in_features , out_features ):
43+ def __init__ (self , in_features , out_features , bias ):
4444 super ().__init__ ()
45- self .linear1 = torch .nn .Linear (in_features , out_features , bias = False )
46- self .linear2 = torch .nn .Linear (out_features , in_features , bias = False )
45+ self .linear1 = torch .nn .Linear (in_features , out_features , bias = bias )
46+ self .linear2 = torch .nn .Linear (out_features , in_features , bias = bias )
4747
4848 def forward (self , x ):
4949 x = self .linear1 (x )
@@ -104,6 +104,8 @@ def setUp(self):
104104 ((32 , 128 ), 256 , 512 ),
105105 ],
106106 )
107+ @common_utils .parametrize ("bias" , [False , True ])
108+ @torch .no_grad ()
107109 def test_fp8_linear_variants (
108110 self ,
109111 dtype : torch .dtype ,
@@ -112,6 +114,7 @@ def test_fp8_linear_variants(
112114 granularity ,
113115 kernel_preference : KernelPreference ,
114116 sizes : Tuple ,
117+ bias : bool ,
115118 ):
116119 if isinstance (granularity , PerTensor ):
117120 if kernel_preference is KernelPreference .FBGEMM :
@@ -132,6 +135,16 @@ def test_fp8_linear_variants(
132135 ):
133136 return unittest .skip ("unimplemented" )
134137
138+ if bias is True :
139+ sizes_to_keep = ((128 ,), 256 , 128 )
140+ if (
141+ sizes != sizes_to_keep
142+ or kernel_preference is not KernelPreference .TORCH
143+ ):
144+ return unittest .skip (
145+ "cut down on number of options to save test time"
146+ )
147+
135148 error_message = None
136149 if isinstance (granularity , PerRow ):
137150 if mode == "dynamic" and dtype != torch .bfloat16 :
@@ -160,7 +173,7 @@ def test_fp8_linear_variants(
160173 input_tensor = torch .randn (* M , K , dtype = dtype , device = "cuda" )
161174
162175 # Create a linear layer with bfloat16 dtype
163- model = ToyLinearModel (K , N ).eval ().to (dtype ).to ("cuda" )
176+ model = ToyLinearModel (K , N , bias ).eval ().to (dtype ).to ("cuda" )
164177
165178 quantized_model = copy .deepcopy (model )
166179
@@ -362,7 +375,7 @@ def test_kernel_preference_numerical_equivalence(self, granularity, sizes):
362375 dtype = torch .bfloat16
363376 input_tensor = torch .randn (* M , K , dtype = dtype , device = "cuda" )
364377 # Create a linear layer with bfloat16 dtype
365- model = ToyLinearModel (K , N ).eval ().to (dtype ).to ("cuda" )
378+ model = ToyLinearModel (K , N , bias = False ).eval ().to (dtype ).to ("cuda" )
366379
367380 # reference kernel preference and results
368381 # we are using KerenelPreference.TORCH as the reference
0 commit comments