add some bug fixes

jcaip · jcaip · commit ff68bb37d91a · 2025-11-03T10:08:14.000-08:00
diff --git a/torchao/dtypes/affine_quantized_tensor_ops.py b/torchao/dtypes/affine_quantized_tensor_ops.py
@@ -456,6 +456,9 @@ def _(func, types, args, kwargs):
 def _(func, types, args, kwargs):
     self = args[0]
     src = args[1]
+    if type(self) is torch.Tensor and isinstance(src, AffineQuantizedTensor):
+        func(self, src.dequantize())
+        return
     if _same_metadata(self, src):
         self_tensors = self.__tensor_flatten__()[0]
         for tensor_name in self_tensors:
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -485,7 +485,7 @@ def quantize_(
                 or ("_default" in config.fqn_to_config and _is_linear(module))
             ):
                 module_name = (
-                    module_fqn.rsplit(".", 1) if "." in module_fqn else module_fqn
+                    module_fqn.rsplit(".", 1)[0] if "." in module_fqn else module_fqn
                 )
                 # this replaces inplace, so no need to reassign
                 _fqn_to_config_handler(module, module_name, config)
diff --git a/torchao/quantization/quantize_/workflows/float8/float8_tensor.py b/torchao/quantization/quantize_/workflows/float8/float8_tensor.py
@@ -202,13 +202,14 @@ def from_hp(
             else:
                 maybe_hp_value_ub_tensor = None
             if isinstance(granularity, PerRow):
-                data, scale = torch.ops.triton.quantize_fp8_row(
-                    hp_tensor, scale_ub=maybe_hp_value_ub_tensor
-                )
-                scale_shape = []
-                for i in range(hp_tensor.ndim):
-                    scale_shape.append(hp_tensor.shape[i] // block_size[i])
-                scale = scale.reshape(*scale_shape)
+                with torch.cuda.device(hp_tensor.device):
+                    data, scale = torch.ops.triton.quantize_fp8_row(
+                        hp_tensor, scale_ub=maybe_hp_value_ub_tensor
+                    )
+                    scale_shape = []
+                    for i in range(hp_tensor.ndim):
+                        scale_shape.append(hp_tensor.shape[i] // block_size[i])
+                    scale = scale.reshape(*scale_shape)
             else:
                 assert isinstance(granularity, PerTensor), (
                     f"Expected per tensor, got {granularity}"
diff --git a/torchao/utils.py b/torchao/utils.py
@@ -571,6 +571,9 @@ def _same_metadata(self: TorchAOBaseTensor, src: TorchAOBaseTensor) -> bool:
     def _(func, types, args, kwargs):
         self = args[0]
         src = args[1]
+        if type(self) is torch.Tensor and isinstance(src, TorchAOBaseTensor):
+            func(self, src.dequantize())
+            return
         if _same_metadata(self, src):
             self_tensors = self.__tensor_flatten__()[0]
             for tensor_name in self_tensors:

Original file line number	Diff line number	Diff line change
`@@ -485,7 +485,7 @@ def quantize_(`
`485`	`485`	`or ("_default" in config.fqn_to_config and _is_linear(module))`
`486`	`486`	`):`
`487`	`487`	`module_name = (`
`488`		`- module_fqn.rsplit(".", 1) if "." in module_fqn else module_fqn`
	`488`	`+ module_fqn.rsplit(".", 1)[0] if "." in module_fqn else module_fqn`
`489`	`489`	`)`
`490`	`490`	`# this replaces inplace, so no need to reassign`
`491`	`491`	`_fqn_to_config_handler(module, module_name, config)`