Skip to content

Commit 0742090

Browse files
[llvm-exegesis] Use older instructions to load lower vregs (#114768)
This patch makes X86 llvm-exegesis unconditionally use older instructions to load the lower vector registers, rather than trying to use AVX512 for everything when available. This fixes a case where we would try and load AVX512 registers using the older instructions if such a snippet was constructed while -mcpu was set to something that did not support AVX512. This would lead to a machine code verification error rather than resulting in incomplete snippet setup, which seems to be the intention of how this should work. Fixes #114691.
1 parent 7c69491 commit 0742090

File tree

2 files changed

+44
-8
lines changed

2 files changed

+44
-8
lines changed

llvm/tools/llvm-exegesis/lib/X86/Target.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,18 +1054,22 @@ std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
10541054
ConstantInliner CI(Value);
10551055
if (X86::VR64RegClass.contains(Reg))
10561056
return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm);
1057+
if (X86::VR128RegClass.contains(Reg)) {
1058+
if (STI.getFeatureBits()[X86::FeatureAVX])
1059+
return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
1060+
return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
1061+
}
10571062
if (X86::VR128XRegClass.contains(Reg)) {
10581063
if (STI.getFeatureBits()[X86::FeatureAVX512])
10591064
return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm);
1065+
}
1066+
if (X86::VR256RegClass.contains(Reg)) {
10601067
if (STI.getFeatureBits()[X86::FeatureAVX])
1061-
return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
1062-
return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
1068+
return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
10631069
}
10641070
if (X86::VR256XRegClass.contains(Reg)) {
10651071
if (STI.getFeatureBits()[X86::FeatureAVX512])
10661072
return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm);
1067-
if (STI.getFeatureBits()[X86::FeatureAVX])
1068-
return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
10691073
}
10701074
if (X86::VR512RegClass.contains(Reg))
10711075
if (STI.getFeatureBits()[X86::FeatureAVX512])

llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -242,15 +242,28 @@ TEST_F(X86Core2AvxTargetTest, SetRegToVR128Value_Use_VMOVDQUrm) {
242242
IsStackDeallocate(16)));
243243
}
244244

245-
TEST_F(X86Core2Avx512TargetTest, SetRegToVR128Value_Use_VMOVDQU32Z128rm) {
245+
TEST_F(X86Core2Avx512TargetTest,
246+
SetRegToVR128ValueHighXMM_Use_VMOVDQU32Z128rm) {
247+
EXPECT_THAT(
248+
setRegTo(X86::XMM16, APInt(128, "11112222333344445555666677778888", 16)),
249+
ElementsAre(IsStackAllocate(16),
250+
IsMovValueToStack(X86::MOV32mi, 0x77778888UL, 0),
251+
IsMovValueToStack(X86::MOV32mi, 0x55556666UL, 4),
252+
IsMovValueToStack(X86::MOV32mi, 0x33334444UL, 8),
253+
IsMovValueToStack(X86::MOV32mi, 0x11112222UL, 12),
254+
IsMovValueFromStack(X86::VMOVDQU32Z128rm, X86::XMM16),
255+
IsStackDeallocate(16)));
256+
}
257+
258+
TEST_F(X86Core2Avx512TargetTest, SetRegToVR128ValueLowXMM_Use_VMOVDQUrm) {
246259
EXPECT_THAT(
247260
setRegTo(X86::XMM0, APInt(128, "11112222333344445555666677778888", 16)),
248261
ElementsAre(IsStackAllocate(16),
249262
IsMovValueToStack(X86::MOV32mi, 0x77778888UL, 0),
250263
IsMovValueToStack(X86::MOV32mi, 0x55556666UL, 4),
251264
IsMovValueToStack(X86::MOV32mi, 0x33334444UL, 8),
252265
IsMovValueToStack(X86::MOV32mi, 0x11112222UL, 12),
253-
IsMovValueFromStack(X86::VMOVDQU32Z128rm, X86::XMM0),
266+
IsMovValueFromStack(X86::VMOVDQUrm, X86::XMM0),
254267
IsStackDeallocate(16)));
255268
}
256269

@@ -272,7 +285,26 @@ TEST_F(X86Core2AvxTargetTest, SetRegToVR256Value_Use_VMOVDQUYrm) {
272285
IsStackDeallocate(32)}));
273286
}
274287

275-
TEST_F(X86Core2Avx512TargetTest, SetRegToVR256Value_Use_VMOVDQU32Z256rm) {
288+
TEST_F(X86Core2Avx512TargetTest,
289+
SetRegToVR256ValueHighYMM_Use_VMOVDQU32Z256rm) {
290+
const char ValueStr[] =
291+
"1111111122222222333333334444444455555555666666667777777788888888";
292+
EXPECT_THAT(
293+
setRegTo(X86::YMM16, APInt(256, ValueStr, 16)),
294+
ElementsAreArray({IsStackAllocate(32),
295+
IsMovValueToStack(X86::MOV32mi, 0x88888888UL, 0),
296+
IsMovValueToStack(X86::MOV32mi, 0x77777777UL, 4),
297+
IsMovValueToStack(X86::MOV32mi, 0x66666666UL, 8),
298+
IsMovValueToStack(X86::MOV32mi, 0x55555555UL, 12),
299+
IsMovValueToStack(X86::MOV32mi, 0x44444444UL, 16),
300+
IsMovValueToStack(X86::MOV32mi, 0x33333333UL, 20),
301+
IsMovValueToStack(X86::MOV32mi, 0x22222222UL, 24),
302+
IsMovValueToStack(X86::MOV32mi, 0x11111111UL, 28),
303+
IsMovValueFromStack(X86::VMOVDQU32Z256rm, X86::YMM16),
304+
IsStackDeallocate(32)}));
305+
}
306+
307+
TEST_F(X86Core2Avx512TargetTest, SetRegToVR256ValueLowYMM_Use_VMOVDQUYrm) {
276308
const char ValueStr[] =
277309
"1111111122222222333333334444444455555555666666667777777788888888";
278310
EXPECT_THAT(
@@ -286,7 +318,7 @@ TEST_F(X86Core2Avx512TargetTest, SetRegToVR256Value_Use_VMOVDQU32Z256rm) {
286318
IsMovValueToStack(X86::MOV32mi, 0x33333333UL, 20),
287319
IsMovValueToStack(X86::MOV32mi, 0x22222222UL, 24),
288320
IsMovValueToStack(X86::MOV32mi, 0x11111111UL, 28),
289-
IsMovValueFromStack(X86::VMOVDQU32Z256rm, X86::YMM0),
321+
IsMovValueFromStack(X86::VMOVDQUYrm, X86::YMM0),
290322
IsStackDeallocate(32)}));
291323
}
292324

0 commit comments

Comments
 (0)