From e14907f5005e8a9f28bcd460c4acfb45bd281c23 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 4 Nov 2024 09:13:26 +0000 Subject: [PATCH] [llvm-exegesis] Use older instructions to load lower vregs This patch makes X86 llvm-exegesis unconditionally use older instructions to load the lower vector registers, rather than trying to use AVX512 for everything when available. This fixes a case where we would try and load AVX512 registers using the older instructions if such a snippet was constructed while -mcpu was set to something that did not support AVX512. This would lead to a machine code verification error rather than resulting in incomplete snippet setup, which seems to be the intention of how this should work. Fixes #114691. --- llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 12 ++++-- .../tools/llvm-exegesis/X86/TargetTest.cpp | 40 +++++++++++++++++-- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 4709dede5b2e2..0a70321fab781 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -1054,18 +1054,22 @@ std::vector ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, ConstantInliner CI(Value); if (X86::VR64RegClass.contains(Reg)) return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm); + if (X86::VR128RegClass.contains(Reg)) { + if (STI.getFeatureBits()[X86::FeatureAVX]) + return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm); + return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm); + } if (X86::VR128XRegClass.contains(Reg)) { if (STI.getFeatureBits()[X86::FeatureAVX512]) return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm); + } + if (X86::VR256RegClass.contains(Reg)) { if (STI.getFeatureBits()[X86::FeatureAVX]) - return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm); - return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm); + return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm); } if (X86::VR256XRegClass.contains(Reg)) { if (STI.getFeatureBits()[X86::FeatureAVX512]) return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm); - if (STI.getFeatureBits()[X86::FeatureAVX]) - return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm); } if (X86::VR512RegClass.contains(Reg)) if (STI.getFeatureBits()[X86::FeatureAVX512]) diff --git a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp index 3a028bad486cc..921d7d7975f6a 100644 --- a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp @@ -242,7 +242,20 @@ TEST_F(X86Core2AvxTargetTest, SetRegToVR128Value_Use_VMOVDQUrm) { IsStackDeallocate(16))); } -TEST_F(X86Core2Avx512TargetTest, SetRegToVR128Value_Use_VMOVDQU32Z128rm) { +TEST_F(X86Core2Avx512TargetTest, + SetRegToVR128ValueHighXMM_Use_VMOVDQU32Z128rm) { + EXPECT_THAT( + setRegTo(X86::XMM16, APInt(128, "11112222333344445555666677778888", 16)), + ElementsAre(IsStackAllocate(16), + IsMovValueToStack(X86::MOV32mi, 0x77778888UL, 0), + IsMovValueToStack(X86::MOV32mi, 0x55556666UL, 4), + IsMovValueToStack(X86::MOV32mi, 0x33334444UL, 8), + IsMovValueToStack(X86::MOV32mi, 0x11112222UL, 12), + IsMovValueFromStack(X86::VMOVDQU32Z128rm, X86::XMM16), + IsStackDeallocate(16))); +} + +TEST_F(X86Core2Avx512TargetTest, SetRegToVR128ValueLowXMM_Use_VMOVDQUrm) { EXPECT_THAT( setRegTo(X86::XMM0, APInt(128, "11112222333344445555666677778888", 16)), ElementsAre(IsStackAllocate(16), @@ -250,7 +263,7 @@ TEST_F(X86Core2Avx512TargetTest, SetRegToVR128Value_Use_VMOVDQU32Z128rm) { IsMovValueToStack(X86::MOV32mi, 0x55556666UL, 4), IsMovValueToStack(X86::MOV32mi, 0x33334444UL, 8), IsMovValueToStack(X86::MOV32mi, 0x11112222UL, 12), - IsMovValueFromStack(X86::VMOVDQU32Z128rm, X86::XMM0), + IsMovValueFromStack(X86::VMOVDQUrm, X86::XMM0), IsStackDeallocate(16))); } @@ -272,7 +285,26 @@ TEST_F(X86Core2AvxTargetTest, SetRegToVR256Value_Use_VMOVDQUYrm) { IsStackDeallocate(32)})); } -TEST_F(X86Core2Avx512TargetTest, SetRegToVR256Value_Use_VMOVDQU32Z256rm) { +TEST_F(X86Core2Avx512TargetTest, + SetRegToVR256ValueHighYMM_Use_VMOVDQU32Z256rm) { + const char ValueStr[] = + "1111111122222222333333334444444455555555666666667777777788888888"; + EXPECT_THAT( + setRegTo(X86::YMM16, APInt(256, ValueStr, 16)), + ElementsAreArray({IsStackAllocate(32), + IsMovValueToStack(X86::MOV32mi, 0x88888888UL, 0), + IsMovValueToStack(X86::MOV32mi, 0x77777777UL, 4), + IsMovValueToStack(X86::MOV32mi, 0x66666666UL, 8), + IsMovValueToStack(X86::MOV32mi, 0x55555555UL, 12), + IsMovValueToStack(X86::MOV32mi, 0x44444444UL, 16), + IsMovValueToStack(X86::MOV32mi, 0x33333333UL, 20), + IsMovValueToStack(X86::MOV32mi, 0x22222222UL, 24), + IsMovValueToStack(X86::MOV32mi, 0x11111111UL, 28), + IsMovValueFromStack(X86::VMOVDQU32Z256rm, X86::YMM16), + IsStackDeallocate(32)})); +} + +TEST_F(X86Core2Avx512TargetTest, SetRegToVR256ValueLowYMM_Use_VMOVDQUYrm) { const char ValueStr[] = "1111111122222222333333334444444455555555666666667777777788888888"; EXPECT_THAT( @@ -286,7 +318,7 @@ TEST_F(X86Core2Avx512TargetTest, SetRegToVR256Value_Use_VMOVDQU32Z256rm) { IsMovValueToStack(X86::MOV32mi, 0x33333333UL, 20), IsMovValueToStack(X86::MOV32mi, 0x22222222UL, 24), IsMovValueToStack(X86::MOV32mi, 0x11111111UL, 28), - IsMovValueFromStack(X86::VMOVDQU32Z256rm, X86::YMM0), + IsMovValueFromStack(X86::VMOVDQUYrm, X86::YMM0), IsStackDeallocate(32)})); }