diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py index de2462106c..31757e7ee6 100644 --- a/torchao/quantization/__init__.py +++ b/torchao/quantization/__init__.py @@ -46,11 +46,11 @@ "int4_weight_only", "int8_weight_only", "uintx_weight_only", - "float8_weight_only", "fpx_weight_only", "LinearActivationQuantizedTensor", "to_linear_activation_quantized", "to_weight_tensor_with_linear_activation_scale_metadata", "float8_weight_only", - "float8_dynamic_activation_float8_weight" + "float8_dynamic_activation_float8_weight", + "float8_static_activation_float8_weight" ] diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py index 6b41dc62c6..6c41425062 100644 --- a/torchao/quantization/quant_api.py +++ b/torchao/quantization/quant_api.py @@ -93,6 +93,7 @@ "uintx_weight_only", "fpx_weight_only", "float8_dynamic_activation_float8_weight", + "float8_static_activation_float8_weight", ] from .GPTQ import (