Update

vkuzo · vkuzo · commit e6b16a33a5eb · 2025-11-03T12:42:02.000-08:00
[ghstack-poisoned]
diff --git a/torchao/_models/llama/eval.py b/torchao/_models/llama/eval.py
@@ -174,6 +174,7 @@ def run_evaluation(
         if quantization == "float8_a1x128_w128x128":
             config = Float8DynamicActivationFloat8WeightConfig(
                 granularity=(PerBlock([1, 128]), PerBlock([128, 128])),
+                activation_value_lb=1e-12,
             )
             # TODO(future): all workflows in this file should be skipping quantization
             # of `lm_head`
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -1778,8 +1778,6 @@ def __post_init__(self):
 
         default_use_fast_accum = True
         if _granularity_is_a_1_128_w_128_128(self.granularity):
-            assert self.activation_value_lb is None, "unimplemented"
-            assert self.activation_value_ub is None, "unimplemented"
             assert self.kernel_preference in (
                 KernelPreference.AUTO,
                 KernelPreference.TORCH,

Original file line number	Diff line number	Diff line change
`@@ -174,6 +174,7 @@ def run_evaluation(`
`174`	`174`	`if quantization == "float8_a1x128_w128x128":`
`175`	`175`	`config = Float8DynamicActivationFloat8WeightConfig(`
`176`	`176`	`granularity=(PerBlock([1, 128]), PerBlock([128, 128])),`
	`177`	`+ activation_value_lb=1e-12,`
`177`	`178`	`)`
`178`	`179`	`# TODO(future): all workflows in this file should be skipping quantization`
`179`	`180`	# of `lm_head`