pytorch
diff --git a/‎test/quantization/test_quant_api.py
Lines changed: 5 additions & 5 deletions b/‎test/quantization/test_quant_api.py
Lines changed: 5 additions & 5 deletions
@@ -106,8 +106,8 @@ def __init__(self, m=64, n=32, k=64):
         self.linear1 = torch.nn.Linear(m, n, bias=False).to(torch.float)
         self.linear2 = torch.nn.Linear(n, k, bias=False).to(torch.float)
 
-    def example_inputs(self, batch_size=1):
-        return (torch.randn(batch_size, self.linear1.in_features).to(torch.float),)
+    def example_inputs(self, batch_size=1, dtype=torch.float, device="cpu"):
+        return (torch.randn(batch_size, self.linear1.in_features, dtype=dtype, device=device),)
 
     def forward(self, x):
         x = self.linear1(x)
@@ -482,10 +482,10 @@ def test_quantized_tensor_subclass_int4(self):
         # use 1024 so that we don't need padding
         m = ToyLinearModel(1024, 1024, 1024).eval().to(torch.bfloat16).to("cuda")
         m_copy = copy.deepcopy(m)
-        example_inputs = tuple(map(lambda x: x.to(torch.bfloat16).to("cuda"), m.example_inputs()))
+        example_inputs = m.example_inputs(dtype=torch.bfloat16, device="cuda")
 
         def apply_weight_quant(weight):
-            return to_aq(weight, mapping_type, block_size, target_dtype, quant_min, quant_max, eps, zero_point_dtype=zero_point_dtype, preserve_zero=preserve_zero, zero_point_domain=zero_point_domain)
+            return to_aq(weight, mapping_type, block_size, target_dtype, quant_min, quant_max, eps, zero_point_dtype=zero_point_dtype, preserve_zero=preserve_zero, zero_point_domain=zero_point_domain, extended_layout="tensor_core_tiled")
 
         m = quantize(m, apply_weight_quant)
         assert isinstance(m.linear1.weight, AffineQuantizedTensor)
@@ -562,7 +562,7 @@ def get_per_token_block_size(x):
         m = ToyLinearModel(1024, 1024, 1024).eval().to(torch.bfloat16).to("cuda")
         m_copy = copy.deepcopy(m)
         # setting batch_size to 20 to be compatible with the kernel
-        example_inputs = tuple(map(lambda x: x.to(torch.bfloat16).to("cuda"), m.example_inputs(batch_size=20)))
+        example_inputs = m.example_inputs(batch_size=20, dtype=torch.bfloat16, device="cuda")
 
         def apply_weight_quant(weight):
             block_size = get_weight_block_size(weight)