Commit 274ed29
committed
Improve QAT fp8-int4 numerics
**Summary:** This commit improved the prepare vs convert SQNR
of fp8-int4 QAT from 12 to 22. This is achieved by mimicking the
numerics of the target FBGEMM fp8-int4 kernel more closely.
In particular, FBGEMM first quantizes the weights to fp8, and
then uses max abs values to compute the scale, which is
significantly different from what torchao's quant primitives do.
**Test Plan:**
```
python test/quantization/test_qat.py -k test_fbgemm_fp8_primitives
python test/quantization/test_qat.py -k test_fbgemm_int4_primitives
python test/quantization/test_qat.py -k test_quantize_api_fp8_int4
```1 parent 4700fe8 commit 274ed29
File tree
4 files changed
+243
-26
lines changed- test/quantization
- torchao/quantization
- qat
4 files changed
+243
-26
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
49 | 49 | | |
50 | 50 | | |
51 | 51 | | |
| 52 | + | |
52 | 53 | | |
53 | 54 | | |
54 | 55 | | |
| |||
1929 | 1930 | | |
1930 | 1931 | | |
1931 | 1932 | | |
1932 | | - | |
| 1933 | + | |
1933 | 1934 | | |
1934 | 1935 | | |
1935 | 1936 | | |
| |||
1950 | 1951 | | |
1951 | 1952 | | |
1952 | 1953 | | |
| 1954 | + | |
| 1955 | + | |
| 1956 | + | |
| 1957 | + | |
| 1958 | + | |
| 1959 | + | |
| 1960 | + | |
| 1961 | + | |
| 1962 | + | |
| 1963 | + | |
| 1964 | + | |
| 1965 | + | |
| 1966 | + | |
1953 | 1967 | | |
1954 | 1968 | | |
1955 | 1969 | | |
| |||
1964 | 1978 | | |
1965 | 1979 | | |
1966 | 1980 | | |
1967 | | - | |
1968 | | - | |
| 1981 | + | |
1969 | 1982 | | |
1970 | | - | |
| 1983 | + | |
1971 | 1984 | | |
1972 | 1985 | | |
1973 | 1986 | | |
| |||
2033 | 2046 | | |
2034 | 2047 | | |
2035 | 2048 | | |
| 2049 | + | |
| 2050 | + | |
| 2051 | + | |
| 2052 | + | |
| 2053 | + | |
| 2054 | + | |
| 2055 | + | |
| 2056 | + | |
| 2057 | + | |
| 2058 | + | |
| 2059 | + | |
| 2060 | + | |
| 2061 | + | |
| 2062 | + | |
| 2063 | + | |
| 2064 | + | |
| 2065 | + | |
| 2066 | + | |
| 2067 | + | |
| 2068 | + | |
| 2069 | + | |
| 2070 | + | |
| 2071 | + | |
| 2072 | + | |
| 2073 | + | |
| 2074 | + | |
| 2075 | + | |
| 2076 | + | |
| 2077 | + | |
| 2078 | + | |
| 2079 | + | |
| 2080 | + | |
| 2081 | + | |
| 2082 | + | |
| 2083 | + | |
| 2084 | + | |
| 2085 | + | |
| 2086 | + | |
| 2087 | + | |
| 2088 | + | |
| 2089 | + | |
| 2090 | + | |
| 2091 | + | |
| 2092 | + | |
| 2093 | + | |
| 2094 | + | |
| 2095 | + | |
| 2096 | + | |
| 2097 | + | |
| 2098 | + | |
| 2099 | + | |
| 2100 | + | |
| 2101 | + | |
| 2102 | + | |
| 2103 | + | |
| 2104 | + | |
| 2105 | + | |
| 2106 | + | |
| 2107 | + | |
| 2108 | + | |
| 2109 | + | |
| 2110 | + | |
| 2111 | + | |
| 2112 | + | |
| 2113 | + | |
| 2114 | + | |
| 2115 | + | |
| 2116 | + | |
| 2117 | + | |
| 2118 | + | |
| 2119 | + | |
| 2120 | + | |
| 2121 | + | |
| 2122 | + | |
| 2123 | + | |
| 2124 | + | |
| 2125 | + | |
| 2126 | + | |
| 2127 | + | |
| 2128 | + | |
| 2129 | + | |
| 2130 | + | |
| 2131 | + | |
| 2132 | + | |
| 2133 | + | |
| 2134 | + | |
| 2135 | + | |
| 2136 | + | |
| 2137 | + | |
| 2138 | + | |
| 2139 | + | |
| 2140 | + | |
| 2141 | + | |
| 2142 | + | |
| 2143 | + | |
| 2144 | + | |
| 2145 | + | |
| 2146 | + | |
| 2147 | + | |
| 2148 | + | |
| 2149 | + | |
| 2150 | + | |
| 2151 | + | |
| 2152 | + | |
| 2153 | + | |
| 2154 | + | |
| 2155 | + | |
| 2156 | + | |
| 2157 | + | |
| 2158 | + | |
| 2159 | + | |
| 2160 | + | |
| 2161 | + | |
| 2162 | + | |
| 2163 | + | |
| 2164 | + | |
| 2165 | + | |
| 2166 | + | |
| 2167 | + | |
| 2168 | + | |
| 2169 | + | |
| 2170 | + | |
2036 | 2171 | | |
2037 | 2172 | | |
2038 | 2173 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
77 | 77 | | |
78 | 78 | | |
79 | 79 | | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
| 94 | + | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
80 | 98 | | |
81 | 99 | | |
82 | 100 | | |
| |||
404 | 422 | | |
405 | 423 | | |
406 | 424 | | |
407 | | - | |
408 | | - | |
| 425 | + | |
409 | 426 | | |
410 | | - | |
| 427 | + | |
411 | 428 | | |
412 | 429 | | |
413 | 430 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
4 | 4 | | |
5 | 5 | | |
6 | 6 | | |
7 | | - | |
| 7 | + | |
8 | 8 | | |
9 | 9 | | |
10 | 10 | | |
11 | 11 | | |
12 | 12 | | |
13 | 13 | | |
| 14 | + | |
14 | 15 | | |
15 | 16 | | |
16 | 17 | | |
| |||
20 | 21 | | |
21 | 22 | | |
22 | 23 | | |
| 24 | + | |
23 | 25 | | |
24 | 26 | | |
25 | 27 | | |
| |||
33 | 35 | | |
34 | 36 | | |
35 | 37 | | |
| 38 | + | |
36 | 39 | | |
37 | 40 | | |
38 | 41 | | |
| |||
65 | 68 | | |
66 | 69 | | |
67 | 70 | | |
| 71 | + | |
| 72 | + | |
68 | 73 | | |
69 | 74 | | |
70 | 75 | | |
| |||
93 | 98 | | |
94 | 99 | | |
95 | 100 | | |
96 | | - | |
97 | | - | |
98 | | - | |
| 101 | + | |
99 | 102 | | |
100 | 103 | | |
101 | 104 | | |
102 | 105 | | |
| 106 | + | |
| 107 | + | |
| 108 | + | |
| 109 | + | |
| 110 | + | |
| 111 | + | |
| 112 | + | |
| 113 | + | |
| 114 | + | |
| 115 | + | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
| 126 | + | |
| 127 | + | |
| 128 | + | |
| 129 | + | |
| 130 | + | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
| 165 | + | |
| 166 | + | |
| 167 | + | |
103 | 168 | | |
104 | 169 | | |
105 | 170 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
219 | 219 | | |
220 | 220 | | |
221 | 221 | | |
| 222 | + | |
| 223 | + | |
| 224 | + | |
| 225 | + | |
| 226 | + | |
| 227 | + | |
| 228 | + | |
| 229 | + | |
| 230 | + | |
| 231 | + | |
| 232 | + | |
| 233 | + | |
| 234 | + | |
| 235 | + | |
222 | 236 | | |
223 | 237 | | |
224 | 238 | | |
| |||
2275 | 2289 | | |
2276 | 2290 | | |
2277 | 2291 | | |
2278 | | - | |
2279 | 2292 | | |
2280 | 2293 | | |
2281 | 2294 | | |
| |||
2288 | 2301 | | |
2289 | 2302 | | |
2290 | 2303 | | |
2291 | | - | |
2292 | | - | |
2293 | | - | |
| 2304 | + | |
2294 | 2305 | | |
2295 | 2306 | | |
2296 | | - | |
2297 | | - | |
2298 | 2307 | | |
2299 | 2308 | | |
2300 | 2309 | | |
| |||
2310 | 2319 | | |
2311 | 2320 | | |
2312 | 2321 | | |
2313 | | - | |
2314 | | - | |
2315 | | - | |
2316 | | - | |
2317 | | - | |
2318 | | - | |
2319 | | - | |
2320 | | - | |
2321 | | - | |
0 commit comments