save shape when fp8 solution not found (#123)

charlifu · gshtras · web-flow · commit 8608888a8519 · 2024-08-08T13:34:12.000-04:00
Co-authored-by: Gregory Shtrasberg &lt;156009573+gshtras@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/layers/quantization/fp8_rocm.py b/vllm/model_executor/layers/quantization/fp8_rocm.py
@@ -242,6 +242,8 @@ def apply(
         k = x.shape[1]
 
         solidx = self._config._tuned.get((m, n, k), 0)
+        if solidx == 0:
+            self._config.save_shape(m, n, k)
         res = ops.fp8_mm(x_quant, weight.t(), out_dtype, asf, wsf, osf,
                          int(solidx))