Skip to content

Commit ae204cc

Browse files
authored
Remove FbgemmConfig and remaining Fbgemm tensors (#3032)
Summary: This is used for prototype previously, not used now, we now expose fbgemm kernels through Int4WeightOnlyConfig (for int4) and Float8DynamicActivationFloat8WeightConfig (for FP8) Not considering this BC breaking since we haven't publicized the API yet Test Plan: CI Reviewers: Subscribers: Tasks: Tags:
1 parent 18dbe87 commit ae204cc

File tree

8 files changed

+1
-383
lines changed

8 files changed

+1
-383
lines changed

docs/source/torchao_vllm_integration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class MyNewQuantConfig(AOBaseConfig):
171171
VERSION: ClassVar[int] = 1
172172

173173
class MyQuantizedTensor(TorchAOBaseTensor):
174-
"""Example based on FbgemmFp8Tensor - stores quantized data + scale"""
174+
"""Example based on Float8Tensor - stores quantized data + scale"""
175175

176176
tensor_data_attrs = ["quantized_data", "scale"]
177177
tensor_attributes = ["dtype"]

test/core/test_config.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
AWQStep,
2525
)
2626
from torchao.quantization.quant_api import (
27-
FbgemmConfig,
2827
Float8DynamicActivationFloat8WeightConfig,
2928
Float8DynamicActivationInt4WeightConfig,
3029
Float8WeightOnlyConfig,
@@ -92,7 +91,6 @@
9291
),
9392
AWQConfig(Int4WeightOnlyConfig(group_size=128), step=AWQStep.PREPARE_FOR_LOADING),
9493
AWQConfig(Int4WeightOnlyConfig(group_size=128), step="prepare_for_loading"),
95-
FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, [1, 1, 256]),
9694
]
9795

9896

test/dtypes/test_affine_quantized.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@
2424
to_affine_quantized_intx,
2525
to_affine_quantized_intx_static,
2626
)
27-
from torchao.float8.config import e4m3_dtype
2827
from torchao.quantization import (
29-
FbgemmConfig,
3028
Float8WeightOnlyConfig,
3129
GemliteUIntXWeightOnlyConfig,
3230
Int4DynamicActivationInt4WeightConfig,
@@ -44,7 +42,6 @@
4442
is_fbcode,
4543
is_ROCM,
4644
is_sm_at_least_89,
47-
is_sm_at_least_90,
4845
)
4946

5047
is_cusparselt_available = (
@@ -100,10 +97,6 @@ def get_quantization_functions(
10097
if is_sm_at_least_89():
10198
base_functions.append(Float8WeightOnlyConfig())
10299

103-
if is_sm_at_least_90():
104-
base_functions.append(FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16))
105-
base_functions.append(FbgemmConfig(e4m3_dtype, e4m3_dtype, torch.bfloat16))
106-
107100
return base_functions
108101

109102

torchao/_models/llama/generate.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -434,25 +434,6 @@ def ffn_or_attn_only(mod, fqn):
434434
model,
435435
Int4WeightOnlyConfig(group_size=group_size, use_hqq=use_hqq, version=1),
436436
)
437-
elif "fbgemm" in quantization and "int4" in quantization:
438-
from torchao.quantization import FbgemmConfig
439-
440-
_, precision, group_size = quantization.split("-")
441-
group_size = int(group_size)
442-
block_size = [1, group_size]
443-
assert precision == "int4", f"FbegemmConfig({precision=}) not supported yet"
444-
quantize_(
445-
model,
446-
FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, block_size),
447-
)
448-
elif "fbgemm" in quantization and "fp8" in quantization:
449-
from torchao.float8.config import e4m3_dtype
450-
from torchao.quantization import FbgemmConfig
451-
452-
quantize_(
453-
model,
454-
FbgemmConfig(e4m3_dtype, e4m3_dtype, torch.bfloat16),
455-
)
456437
elif "int4dq-" in quantization:
457438
from torchao.dtypes import CutlassInt4PackedLayout
458439

torchao/dtypes/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
to_affine_quantized_intx,
99
to_affine_quantized_intx_static,
1010
)
11-
from .fbgemm_fp8_tensor import FbgemmFp8Tensor, to_fbgemm_fp8
1211
from .floatx import (
1312
CutlassSemiSparseLayout,
1413
Float8Layout,

torchao/dtypes/fbgemm_fp8_tensor.py

Lines changed: 0 additions & 268 deletions
This file was deleted.

torchao/quantization/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
)
4444
from .quant_api import (
4545
CutlassInt4PackedLayout,
46-
FbgemmConfig,
4746
Float8DynamicActivationFloat8SemiSparseWeightConfig,
4847
Float8DynamicActivationFloat8WeightConfig,
4948
Float8DynamicActivationInt4WeightConfig,
@@ -161,7 +160,6 @@
161160
"GemliteUIntXWeightOnlyConfig",
162161
"AOPerModuleConfig",
163162
"ModuleFqnToConfig",
164-
"FbgemmConfig",
165163
# tensor subclasses
166164
"Int4Tensor",
167165
"Int4PlainInt32Tensor",

0 commit comments

Comments
 (0)