Skip to content

Commit 0c9f6ad

Browse files
committed
[X86] Correct the scheduling information for AVX-VNNI and AVX512-VNNI instructons.
The AVXVNNI load instructions weren't using the Folded load write class and they had no ReadAdvance. The YMM versions were using the XMM schedule class. The AVX512VNNI instructions had the right classes, but not enough ReadAdvances to account for the 2 sources. Noticed while investigating llvm#62026. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D147872
1 parent af27138 commit 0c9f6ad

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12577,15 +12577,17 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
1257712577
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
1257812578
(VTI.VT (VTI.LdFrag addr:$src3))))>,
1257912579
EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12580-
Sched<[sched.Folded, sched.ReadAfterFold]>;
12580+
Sched<[sched.Folded, sched.ReadAfterFold,
12581+
sched.ReadAfterFold]>;
1258112582
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
1258212583
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
1258312584
OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
1258412585
"$src2, ${src3}"#VTI.BroadcastStr,
1258512586
(OpNode VTI.RC:$src1, VTI.RC:$src2,
1258612587
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
1258712588
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12588-
T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12589+
T8PD, Sched<[sched.Folded, sched.ReadAfterFold,
12590+
sched.ReadAfterFold]>;
1258912591
}
1259012592
}
1259112593

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7332,22 +7332,26 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
73327332
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
73337333
[(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
73347334
(loadv4i32 addr:$src3))))]>,
7335-
VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
7335+
VEX_4V, Sched<[SchedWriteVecIMul.XMM.Folded,
7336+
SchedWriteVecIMul.XMM.ReadAfterFold,
7337+
SchedWriteVecIMul.XMM.ReadAfterFold]>;
73367338

73377339
let isCommutable = IsCommutable in
73387340
def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
73397341
(ins VR256:$src1, VR256:$src2, VR256:$src3),
73407342
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
73417343
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
73427344
VR256:$src2, VR256:$src3)))]>,
7343-
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
7345+
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
73447346

73457347
def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
73467348
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
73477349
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
73487350
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
73497351
(loadv8i32 addr:$src3))))]>,
7350-
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
7352+
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded,
7353+
SchedWriteVecIMul.YMM.ReadAfterFold,
7354+
SchedWriteVecIMul.YMM.ReadAfterFold]>;
73517355
}
73527356

73537357
defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>;

0 commit comments

Comments
 (0)