-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[llvm-exegesis] Use older instructions to load lower vregs #114768
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[llvm-exegesis] Use older instructions to load lower vregs #114768
Conversation
This patch makes X86 llvm-exegesis unconditionally use older instructions to load the lower vector registers, rather than trying to use AVX512 for everything when available. This fixes a case where we would try and load AVX512 registers using the older instructions if such a snippet was constructed while -mcpu was set to something that did not support AVX512. This would lead to a machine code verification error rather than resulting in incomplete snippet setup, which seems to be the intention of how this should work. Fixes llvm#114691.
@llvm/pr-subscribers-tools-llvm-exegesis Author: Aiden Grossman (boomanaiden154) ChangesThis patch makes X86 llvm-exegesis unconditionally use older instructions to load the lower vector registers, rather than trying to use AVX512 for everything when available. This fixes a case where we would try and load AVX512 registers using the older instructions if such a snippet was constructed while -mcpu was set to something that did not support AVX512. This would lead to a machine code verification error rather than resulting in incomplete snippet setup, which seems to be the intention of how this should work. Fixes #114691. Full diff: https://github.com/llvm/llvm-project/pull/114768.diff 2 Files Affected:
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 4709dede5b2e20..0a70321fab7818 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -1054,18 +1054,22 @@ std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
ConstantInliner CI(Value);
if (X86::VR64RegClass.contains(Reg))
return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm);
+ if (X86::VR128RegClass.contains(Reg)) {
+ if (STI.getFeatureBits()[X86::FeatureAVX])
+ return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
+ return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
+ }
if (X86::VR128XRegClass.contains(Reg)) {
if (STI.getFeatureBits()[X86::FeatureAVX512])
return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm);
+ }
+ if (X86::VR256RegClass.contains(Reg)) {
if (STI.getFeatureBits()[X86::FeatureAVX])
- return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
- return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
+ return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
}
if (X86::VR256XRegClass.contains(Reg)) {
if (STI.getFeatureBits()[X86::FeatureAVX512])
return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm);
- if (STI.getFeatureBits()[X86::FeatureAVX])
- return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
}
if (X86::VR512RegClass.contains(Reg))
if (STI.getFeatureBits()[X86::FeatureAVX512])
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp
index 3a028bad486ccc..921d7d7975f6ae 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp
@@ -242,7 +242,20 @@ TEST_F(X86Core2AvxTargetTest, SetRegToVR128Value_Use_VMOVDQUrm) {
IsStackDeallocate(16)));
}
-TEST_F(X86Core2Avx512TargetTest, SetRegToVR128Value_Use_VMOVDQU32Z128rm) {
+TEST_F(X86Core2Avx512TargetTest,
+ SetRegToVR128ValueHighXMM_Use_VMOVDQU32Z128rm) {
+ EXPECT_THAT(
+ setRegTo(X86::XMM16, APInt(128, "11112222333344445555666677778888", 16)),
+ ElementsAre(IsStackAllocate(16),
+ IsMovValueToStack(X86::MOV32mi, 0x77778888UL, 0),
+ IsMovValueToStack(X86::MOV32mi, 0x55556666UL, 4),
+ IsMovValueToStack(X86::MOV32mi, 0x33334444UL, 8),
+ IsMovValueToStack(X86::MOV32mi, 0x11112222UL, 12),
+ IsMovValueFromStack(X86::VMOVDQU32Z128rm, X86::XMM16),
+ IsStackDeallocate(16)));
+}
+
+TEST_F(X86Core2Avx512TargetTest, SetRegToVR128ValueLowXMM_Use_VMOVDQUrm) {
EXPECT_THAT(
setRegTo(X86::XMM0, APInt(128, "11112222333344445555666677778888", 16)),
ElementsAre(IsStackAllocate(16),
@@ -250,7 +263,7 @@ TEST_F(X86Core2Avx512TargetTest, SetRegToVR128Value_Use_VMOVDQU32Z128rm) {
IsMovValueToStack(X86::MOV32mi, 0x55556666UL, 4),
IsMovValueToStack(X86::MOV32mi, 0x33334444UL, 8),
IsMovValueToStack(X86::MOV32mi, 0x11112222UL, 12),
- IsMovValueFromStack(X86::VMOVDQU32Z128rm, X86::XMM0),
+ IsMovValueFromStack(X86::VMOVDQUrm, X86::XMM0),
IsStackDeallocate(16)));
}
@@ -272,7 +285,26 @@ TEST_F(X86Core2AvxTargetTest, SetRegToVR256Value_Use_VMOVDQUYrm) {
IsStackDeallocate(32)}));
}
-TEST_F(X86Core2Avx512TargetTest, SetRegToVR256Value_Use_VMOVDQU32Z256rm) {
+TEST_F(X86Core2Avx512TargetTest,
+ SetRegToVR256ValueHighYMM_Use_VMOVDQU32Z256rm) {
+ const char ValueStr[] =
+ "1111111122222222333333334444444455555555666666667777777788888888";
+ EXPECT_THAT(
+ setRegTo(X86::YMM16, APInt(256, ValueStr, 16)),
+ ElementsAreArray({IsStackAllocate(32),
+ IsMovValueToStack(X86::MOV32mi, 0x88888888UL, 0),
+ IsMovValueToStack(X86::MOV32mi, 0x77777777UL, 4),
+ IsMovValueToStack(X86::MOV32mi, 0x66666666UL, 8),
+ IsMovValueToStack(X86::MOV32mi, 0x55555555UL, 12),
+ IsMovValueToStack(X86::MOV32mi, 0x44444444UL, 16),
+ IsMovValueToStack(X86::MOV32mi, 0x33333333UL, 20),
+ IsMovValueToStack(X86::MOV32mi, 0x22222222UL, 24),
+ IsMovValueToStack(X86::MOV32mi, 0x11111111UL, 28),
+ IsMovValueFromStack(X86::VMOVDQU32Z256rm, X86::YMM16),
+ IsStackDeallocate(32)}));
+}
+
+TEST_F(X86Core2Avx512TargetTest, SetRegToVR256ValueLowYMM_Use_VMOVDQUYrm) {
const char ValueStr[] =
"1111111122222222333333334444444455555555666666667777777788888888";
EXPECT_THAT(
@@ -286,7 +318,7 @@ TEST_F(X86Core2Avx512TargetTest, SetRegToVR256Value_Use_VMOVDQU32Z256rm) {
IsMovValueToStack(X86::MOV32mi, 0x33333333UL, 20),
IsMovValueToStack(X86::MOV32mi, 0x22222222UL, 24),
IsMovValueToStack(X86::MOV32mi, 0x11111111UL, 28),
- IsMovValueFromStack(X86::VMOVDQU32Z256rm, X86::YMM0),
+ IsMovValueFromStack(X86::VMOVDQUYrm, X86::YMM0),
IsStackDeallocate(32)}));
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch makes X86 llvm-exegesis unconditionally use older instructions to load the lower vector registers, rather than trying to use AVX512 for everything when available. This fixes a case where we would try and load AVX512 registers using the older instructions if such a snippet was constructed while -mcpu was set to something that did not support AVX512. This would lead to a machine code verification error rather than resulting in incomplete snippet setup, which seems to be the intention of how this should work. Fixes llvm#114691.
This patch makes X86 llvm-exegesis unconditionally use older instructions to load the lower vector registers, rather than trying to use AVX512 for everything when available. This fixes a case where we would try and load AVX512 registers using the older instructions if such a snippet was constructed while -mcpu was set to something that did not support AVX512. This would lead to a machine code verification error rather than resulting in incomplete snippet setup, which seems to be the intention of how this should work.
Fixes #114691.