From 7acf6fa59c755db47e248a0d1e77a038ed7874fe Mon Sep 17 00:00:00 2001 From: andrewor14 Date: Fri, 12 Sep 2025 12:43:44 -0700 Subject: [PATCH 1/3] Deprecate config functions like `int4_weight_only` **Summary:** These have been superseded by `AOBaseConfig` objects for several releases already, but we never deprecated them. We will keep them around for another release before breaking BC and removing them. **Test Plan:** ``` python test/quantization/test_quant_api.py -k test_config_deprecation ``` [ghstack-poisoned] --- test/quantization/test_quant_api.py | 45 +++++++++++++++++++++++++++++ torchao/quantization/quant_api.py | 27 ++++++++++++----- torchao/utils.py | 21 +++++++++++++- 3 files changed, 84 insertions(+), 9 deletions(-) diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py index 9f0a1dd001..f8f339db42 100644 --- a/test/quantization/test_quant_api.py +++ b/test/quantization/test_quant_api.py @@ -10,6 +10,7 @@ import gc import tempfile import unittest +import warnings from pathlib import Path import torch @@ -752,6 +753,50 @@ def test_int4wo_cuda_serialization(self): # load state_dict in cuda model.load_state_dict(sd, assign=True) + def test_config_deprecation(self): + """ + Test that old config functions like `int4_weight_only` trigger deprecation warnings. + """ + from torchao.quantization import ( + float8_weight_only, + fpx_weight_only, + gemlite_uintx_weight_only, + int4_weight_only, + int8_dynamic_activation_int4_weight, + int8_dynamic_activation_int8_weight, + int8_weight_only, + uintx_weight_only, + ) + + # Reset deprecation warning state, otherwise we won't log warnings here + warnings.resetwarnings() + + # Map from deprecated API to the args needed to instantiate it + deprecated_apis_to_args = { + float8_weight_only: (), + fpx_weight_only: (3, 2), + gemlite_uintx_weight_only: (), + int4_weight_only: (), + int8_dynamic_activation_int4_weight: (), + int8_dynamic_activation_int8_weight: (), + int8_weight_only: (), + uintx_weight_only: (torch.uint4,), + } + + with warnings.catch_warnings(record=True) as _warnings: + # Call each deprecated API twice + for cls, args in deprecated_apis_to_args.items(): + cls(*args) + cls(*args) + + # Each call should trigger the warning only once + self.assertEqual(len(_warnings), len(deprecated_apis_to_args)) + for w in _warnings: + self.assertIn( + "is deprecated and will be removed in a future release", + str(w.message), + ) + common_utils.instantiate_parametrized_tests(TestQuantFlow) diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py index dffd299a5a..ee18de6728 100644 --- a/torchao/quantization/quant_api.py +++ b/torchao/quantization/quant_api.py @@ -92,6 +92,7 @@ to_weight_tensor_with_linear_activation_quantization_metadata, ) from torchao.utils import ( + _ConfigDeprecationWrapper, _is_fbgemm_genai_gpu_available, is_MI300, is_sm_at_least_89, @@ -639,7 +640,9 @@ def __post_init__(self): # for BC -int8_dynamic_activation_int4_weight = Int8DynamicActivationInt4WeightConfig +int8_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( + "int8_dynamic_activation_int4_weight", Int8DynamicActivationInt4WeightConfig +) @register_quantize_module_handler(Int8DynamicActivationInt4WeightConfig) @@ -1018,7 +1021,9 @@ def __post_init__(self): # for BC -gemlite_uintx_weight_only = GemliteUIntXWeightOnlyConfig +gemlite_uintx_weight_only = _ConfigDeprecationWrapper( + "gemlite_uintx_weight_only", GemliteUIntXWeightOnlyConfig +) @register_quantize_module_handler(GemliteUIntXWeightOnlyConfig) @@ -1100,7 +1105,7 @@ def __post_init__(self): # for BC # TODO maybe change other callsites -int4_weight_only = Int4WeightOnlyConfig +int4_weight_only = _ConfigDeprecationWrapper("int4_weight_only", Int4WeightOnlyConfig) def _int4_weight_only_quantize_tensor(weight, config): @@ -1310,7 +1315,7 @@ def __post_init__(self): # for BC -int8_weight_only = Int8WeightOnlyConfig +int8_weight_only = _ConfigDeprecationWrapper("int8_weight_only", Int8WeightOnlyConfig) def _int8_weight_only_quantize_tensor(weight, config): @@ -1471,7 +1476,9 @@ def __post_init__(self): # for BC -int8_dynamic_activation_int8_weight = Int8DynamicActivationInt8WeightConfig +int8_dynamic_activation_int8_weight = _ConfigDeprecationWrapper( + "int8_dynamic_activation_int8_weight", Int8DynamicActivationInt8WeightConfig +) def _int8_dynamic_activation_int8_weight_quantize_tensor(weight, config): @@ -1580,7 +1587,9 @@ def __post_init__(self): # for BC -float8_weight_only = Float8WeightOnlyConfig +float8_weight_only = _ConfigDeprecationWrapper( + "float8_weight_only", Float8WeightOnlyConfig +) def _float8_weight_only_quant_tensor(weight, config): @@ -1994,7 +2003,9 @@ def __post_init__(self): # for BC -uintx_weight_only = UIntXWeightOnlyConfig +uintx_weight_only = _ConfigDeprecationWrapper( + "uintx_weight_only", UIntXWeightOnlyConfig +) @register_quantize_module_handler(UIntXWeightOnlyConfig) @@ -2234,7 +2245,7 @@ def __post_init__(self): # for BC -fpx_weight_only = FPXWeightOnlyConfig +fpx_weight_only = _ConfigDeprecationWrapper("fpx_weight_only", FPXWeightOnlyConfig) @register_quantize_module_handler(FPXWeightOnlyConfig) diff --git a/torchao/utils.py b/torchao/utils.py index 652e7f33f1..30d8d7b412 100644 --- a/torchao/utils.py +++ b/torchao/utils.py @@ -12,7 +12,7 @@ from functools import reduce from importlib.metadata import version from math import gcd -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, Type import torch import torch.nn.utils.parametrize as parametrize @@ -432,6 +432,25 @@ def __eq__(self, other): TORCH_VERSION_AFTER_2_2 = _deprecated_torch_version_after("2.2.0.dev") +class _ConfigDeprecationWrapper: + """ + A deprecation wrapper that directs users from a deprecated "config function" + (e.g. `int4_weight_only`) to the replacement config class. + """ + + def __init__(self, deprecated_name: str, config_cls: Type): + self.deprecated_name = deprecated_name + self.config_cls = config_cls + + def __call__(self, *args, **kwargs): + warnings.warn( + f"`{self.deprecated_name}` is deprecated and will be removed in a future release. " + f"Please use `{self.config_cls.__name__}` instead. Example usage:\n" + f" quantize_(model, {self.config_cls.__name__}(...))" + ) + return self.config_cls(*args, **kwargs) + + """ Helper function for implementing aten op or torch function dispatch and dispatching to these implementations. From 9a78ca39318f6acabb74d08512a66822dce11797 Mon Sep 17 00:00:00 2001 From: andrewor14 Date: Fri, 12 Sep 2025 13:02:11 -0700 Subject: [PATCH 2/3] Update on "Deprecate config functions like `int4_weight_only`" **Summary:** These have been superseded by `AOBaseConfig` objects for several releases already, but we never deprecated them. We will keep them around for another release before breaking BC and removing them. **Test Plan:** ``` python test/quantization/test_quant_api.py -k test_config_deprecation ``` [ghstack-poisoned] --- test/quantization/test_quant_api.py | 3 +++ torchao/quantization/quant_api.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py index f8f339db42..ec843d8438 100644 --- a/test/quantization/test_quant_api.py +++ b/test/quantization/test_quant_api.py @@ -773,9 +773,12 @@ def test_config_deprecation(self): # Map from deprecated API to the args needed to instantiate it deprecated_apis_to_args = { + float8_dynamic_activation_float8_weight: (), + float8_static_activation_float8_weight: (torch.randn(3)), float8_weight_only: (), fpx_weight_only: (3, 2), gemlite_uintx_weight_only: (), + int4_dynamic_activation_int4_weight: (), int4_weight_only: (), int8_dynamic_activation_int4_weight: (), int8_dynamic_activation_int8_weight: (), diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py index ee18de6728..a59f9c7069 100644 --- a/torchao/quantization/quant_api.py +++ b/torchao/quantization/quant_api.py @@ -960,7 +960,9 @@ def __post_init__(self): # for bc -int4_dynamic_activation_int4_weight = Int4DynamicActivationInt4WeightConfig +int4_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( + "int4_dynamic_activation_int4_weight", Int4DynamicActivationInt4WeightConfig +) @register_quantize_module_handler(Int4DynamicActivationInt4WeightConfig) @@ -1747,7 +1749,9 @@ def __post_init__(self): # for bc -float8_dynamic_activation_float8_weight = Float8DynamicActivationFloat8WeightConfig +float8_dynamic_activation_float8_weight = _ConfigDeprecationWrapper( + "float8_dynamic_activation_float8_weight", Float8DynamicActivationFloat8WeightConfig +) def _float8_dynamic_activation_float8_weight_quantize_tensor(weight, config): @@ -1920,7 +1924,9 @@ def __post_init__(self): # for bc -float8_static_activation_float8_weight = Float8StaticActivationFloat8WeightConfig +float8_static_activation_float8_weight = _ConfigDeprecationWrapper( + "float8_static_activation_float8_weight", Float8StaticActivationFloat8WeightConfig +) @register_quantize_module_handler(Float8StaticActivationFloat8WeightConfig) From 0c851731f7244a5c9138d5d84dab4ddcd5efc692 Mon Sep 17 00:00:00 2001 From: andrewor14 Date: Fri, 12 Sep 2025 13:27:12 -0700 Subject: [PATCH 3/3] Update on "Deprecate config functions like `int4_weight_only`" **Summary:** These have been superseded by `AOBaseConfig` objects for several releases already, but we never deprecated them. We will keep them around for another release before breaking BC and removing them. **Test Plan:** ``` python test/quantization/test_quant_api.py -k test_config_deprecation ``` [ghstack-poisoned] --- test/quantization/test_quant_api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py index ec843d8438..cfa29154ab 100644 --- a/test/quantization/test_quant_api.py +++ b/test/quantization/test_quant_api.py @@ -758,9 +758,12 @@ def test_config_deprecation(self): Test that old config functions like `int4_weight_only` trigger deprecation warnings. """ from torchao.quantization import ( + float8_dynamic_activation_float8_weight, + float8_static_activation_float8_weight, float8_weight_only, fpx_weight_only, gemlite_uintx_weight_only, + int4_dynamic_activation_int4_weight, int4_weight_only, int8_dynamic_activation_int4_weight, int8_dynamic_activation_int8_weight,