Minor fix on TAO op to support lowering

RandySheriff · facebook-github-bot · commit c862f8035ca2 · 2025-09-18T11:44:00.000-07:00
Summary: Fix a few functionality gaps to let TAO operator works during AOTI lowering ~

Differential Revision: D82492826
diff --git a/torchao/dtypes/floatx/cutlass_semi_sparse_layout.py b/torchao/dtypes/floatx/cutlass_semi_sparse_layout.py
@@ -106,12 +106,12 @@ def __torch_dispatch__(cls, func, types, args, kwargs):
             )
         elif func is aten.to.dtype_layout:
             dense, scale, _ = args[0].get_plain()
-            dense = dense.to(
+            product = dense.to(torch.float) * scale.to(torch.float)
+            return product.to(
                 *args[1:],
                 dtype=kwargs.get("dtype", dense.dtype),
                 device=kwargs.get("device", dense.device),
             )
-            return scale * dense
 
         raise NotImplementedError(
             f"CutlassSemiSparseTensorImpl dispatch: attempting to run {func}, this is not supported"
@@ -135,11 +135,14 @@ def get_plain(self):
         # semi-structured format, so multiplying with identity matrix,
         # and using identity scale factors, for the conversion.
         cols = self.shape[1]
-        input = torch.eye(cols, dtype=self.sparse.dtype, device=self.sparse.device)
-        input_scale = torch.ones(
-            (cols,), dtype=self.scale.dtype, device=self.sparse.device
+        input_fp16 = torch.eye(cols, dtype=torch.float16, device=self.sparse.device)
+        input = input_fp16.to(dtype=self.sparse.dtype)
+        input_scale_fp16 = torch.ones(
+            (cols,), dtype=torch.float16, device=self.sparse.device
         )
+        input_scale = input_scale_fp16.to(dtype=self.scale.dtype)
         sparse_scale = torch.ones_like(self.scale)
+
         out_dtype = torch.bfloat16
         dense = (
             rowwise_scaled_linear_sparse_cutlass_f8f8(
diff --git a/torchao/quantization/linear_activation_quantized_tensor.py b/torchao/quantization/linear_activation_quantized_tensor.py
@@ -133,11 +133,15 @@ def _same_metadata(
 
 @implements([torch.nn.functional.linear, aten.linear.default])
 def _(func, types, args, kwargs):
-    input_tensor, weight_tensor, bias = (
-        args[0],
-        args[1],
-        args[2] if len(args) > 2 else None,
-    )
+    if len(args) > 1:
+        input_tensor, weight_tensor, bias = (
+            args[0],
+            args[1],
+            args[2] if len(args) > 2 else None,
+        )
+    else:
+        input_tensor, weight_tensor, bias = kwargs["input"], kwargs["weight"], kwargs["bias"] if "bias" in kwargs else None
+
     if isinstance(weight_tensor, LinearActivationQuantizedTensor):
         return weight_tensor._quantized_linear_op(input_tensor, weight_tensor, bias)
 
@@ -216,6 +220,11 @@ def _(func, types, args, kwargs):
         for tensor_name in self_tensors:
             getattr(self, tensor_name).copy_(getattr(src, tensor_name))
         return
+    elif type(self) is torch.Tensor and type(src) is LinearActivationQuantizedTensor:
+        new_src = src.to(dtype=self.dtype, device=self.device)
+        self.copy_(new_src)
+        return
+
     raise ValueError(
         f"Not supported args for copy_ due to metadata mistach: {args[0], args[1]}"
     )