We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 821bd2b commit 5bd4e3bCopy full SHA for 5bd4e3b
torchao/testing/training/roofline_utils.py
@@ -207,6 +207,7 @@ def get_tensor_memory_traffic_ovhd_s(
207
# across dim0 and dim1. input and grad_output still 1x32.
208
209
if tensor_role in ("input", "grad_output"):
210
+ # TODO(future): update all of the mx rooflines to just read once
211
# kernel 1: x_bf16 -> x_mxfp8_dim0
212
# kernel 2: x_bf16 -> x_mxfp8_dim1
213
if fuse_with_prev:
0 commit comments