diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index bdd86e48fa543..20dbaf797e327 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -619,40 +619,30 @@ void X86DomainReassignment::initConverters() { std::make_unique(From, To); }; - bool HasEGPR = STI->hasEGPR(); - createReplacerDstCOPY(X86::MOVZX32rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacerDstCOPY(X86::MOVZX64rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); +#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC + createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); - createReplacerDstCOPY(X86::MOVZX32rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); - createReplacerDstCOPY(X86::MOVZX64rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); + createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); if (STI->hasDQI()) { - createReplacerDstCOPY(X86::MOVZX16rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX32rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX64rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - - createReplacerDstCOPY(X86::MOVZX16rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX32rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX64rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + + createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); } auto createReplacer = [&](unsigned From, unsigned To) { Converters[{MaskDomain, From}] = std::make_unique(From, To); }; - createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk); - createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); + createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); createReplacer(X86::SHR16ri, X86::KSHIFTRWri); createReplacer(X86::SHL16ri, X86::KSHIFTLWri); createReplacer(X86::NOT16r, X86::KNOTWrr); @@ -661,14 +651,14 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::XOR16rr, X86::KXORWrr); if (STI->hasBWI()) { - createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm); - createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm); + createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); + createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); - createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk); - createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk); + createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); + createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); - createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk); - createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk); + createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); + createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); createReplacer(X86::SHR32ri, X86::KSHIFTRDri); createReplacer(X86::SHR64ri, X86::KSHIFTRQri); @@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST32rr, X86::KTESTDrr); - //createReplacer(X86::TEST64rr, X86::KTESTQrr); + // createReplacer(X86::TEST32rr, X86::KTESTDrr); + // createReplacer(X86::TEST64rr, X86::KTESTQrr); } if (STI->hasDQI()) { @@ -706,9 +696,9 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::AND8rr, X86::KANDBrr); - createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk); - createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); + createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); createReplacer(X86::NOT8r, X86::KNOTBrr); @@ -719,11 +709,12 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST8rr, X86::KTESTBrr); - //createReplacer(X86::TEST16rr, X86::KTESTWrr); + // createReplacer(X86::TEST8rr, X86::KTESTBrr); + // createReplacer(X86::TEST16rr, X86::KTESTWrr); createReplacer(X86::XOR8rr, X86::KXORBrr); } +#undef GET_EGPR_IF_ENABLED } bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0ba31e173a1a7..efbc0e119060f 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic."); +#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC case Intrinsic::x86_sse42_crc32_32_8: - Opc = X86::CRC32r32r8; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_16: - Opc = X86::CRC32r32r16; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_32: - Opc = X86::CRC32r32r32; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_64_64: - Opc = X86::CRC32r64r64; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64); RC = &X86::GR64RegClass; break; +#undef GET_EGPR_IF_ENABLED } const Value *LHS = II->getArgOperand(0); diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 699e5847e63fb..b1be4739617df 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode, HasINVPCID]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode, HasINVPCID]>; + Requires<[In64BitMode]>; def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; } // SchedRW -let Predicates = [In64BitMode, HasINVPCID] in { +let Predicates = [HasINVPCID, NoEGPR] in { // The instruction can only use a 64 bit register as the register argument // in 64 bit mode, while the intrinsic only accepts a 32 bit argument // corresponding to it. @@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in { addr:$src2)>; } +let Predicates = [HasINVPCID, HasEGPR] in { + def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2), + (INVPCID64_EVEX + (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit), + addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SMAP Instruction diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll index 056d79f379fd1..873986e99777d 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll @@ -3,8 +3,9 @@ ; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86 ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86 -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c @@ -21,9 +22,15 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind { ; ; X64-LABEL: test_mm_crc32_u8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32b %sil, %eax -; X64-NEXT: retq +; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X64-NEXT: crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc32_u8: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i8 %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc) ret i32 %res @@ -41,9 +48,15 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind { ; ; X64-LABEL: test_mm_crc32_u16: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32w %si, %eax -; X64-NEXT: retq +; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X64-NEXT: crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc32_u16: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i16 %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc) ret i32 %res @@ -59,9 +72,15 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind { ; ; X64-LABEL: test_mm_crc32_u32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32l %esi, %eax -; X64-NEXT: retq +; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X64-NEXT: crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc32_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res } diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll index e0ec432b38549..71d955bda7523 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c @@ -8,9 +9,15 @@ define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{ ; CHECK-LABEL: test_mm_crc64_u8: ; CHECK: # %bb.0: -; CHECK-NEXT: crc32b %sil, %edi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: retq +; CHECK-NEXT: crc32b %sil, %edi # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] +; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc64_u8: +; EGPR: # %bb.0: +; EGPR-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe] +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i8 %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc) ret i64 %res @@ -20,9 +27,15 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ ; CHECK-LABEL: test_mm_crc64_u64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: crc32q %rsi, %rax -; CHECK-NEXT: retq +; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] +; CHECK-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc64_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) ret i64 %res } diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll index 7623ba68353e1..84c7f90cfe3c3 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { ; X86-LABEL: crc32_32_8: @@ -14,6 +15,12 @@ define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_32_8: +; EGPR: ## %bb.0: +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) ret i32 %tmp } @@ -31,6 +38,12 @@ define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_32_16: +; EGPR: ## %bb.0: +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) ret i32 %tmp } @@ -48,6 +61,12 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_32_32: +; EGPR: ## %bb.0: +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) ret i32 %tmp } diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll index b0f7a394f07b7..bda26a15b277a 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind @@ -10,6 +11,12 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] ; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_64_8: +; EGPR: ## %bb.0: +; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) ret i64 %tmp } @@ -20,6 +27,12 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] ; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_64_64: +; EGPR: ## %bb.0: +; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp } diff --git a/llvm/test/CodeGen/X86/invpcid-intrinsic.ll b/llvm/test/CodeGen/X86/invpcid-intrinsic.ll index 3aa9fde35e23f..19a6249fc708f 100644 --- a/llvm/test/CodeGen/X86/invpcid-intrinsic.ll +++ b/llvm/test/CodeGen/X86/invpcid-intrinsic.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+invpcid | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid | FileCheck %s --check-prefix=X86_64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid --show-mc-encoding | FileCheck %s --check-prefix=X86_64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define void @test_invpcid(i32 %type, ptr %descriptor) { ; X86-LABEL: test_invpcid: @@ -12,9 +13,15 @@ define void @test_invpcid(i32 %type, ptr %descriptor) { ; ; X86_64-LABEL: test_invpcid: ; X86_64: # %bb.0: # %entry -; X86_64-NEXT: movl %edi, %eax -; X86_64-NEXT: invpcid (%rsi), %rax -; X86_64-NEXT: retq +; X86_64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X86_64-NEXT: invpcid (%rsi), %rax # encoding: [0x66,0x0f,0x38,0x82,0x06] +; X86_64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_invpcid: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.invpcid(i32 %type, ptr %descriptor) ret void @@ -31,9 +38,15 @@ define void @test_invpcid2(ptr readonly %type, ptr %descriptor) { ; ; X86_64-LABEL: test_invpcid2: ; X86_64: # %bb.0: # %entry -; X86_64-NEXT: movl (%rdi), %eax -; X86_64-NEXT: invpcid (%rsi), %rax -; X86_64-NEXT: retq +; X86_64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; X86_64-NEXT: invpcid (%rsi), %rax # encoding: [0x66,0x0f,0x38,0x82,0x06] +; X86_64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_invpcid2: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; EGPR-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load i32, ptr %type, align 4 tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1 diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll index 1769bcbf6f605..4d03510ad5d4f 100644 --- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll +++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll @@ -1,19 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b --show-mc-encoding | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define void @test_movdiri(ptr %p, i32 %v) { -; X64-LABEL: test_movdiri: -; X64: # %bb.0: # %entry -; X64-NEXT: movdiri %esi, (%rdi) -; X64-NEXT: retq -; ; X32-LABEL: test_movdiri: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movdiri %eax, (%ecx) ; X32-NEXT: retl +; +; X64-LABEL: test_movdiri: +; X64: # %bb.0: # %entry +; X64-NEXT: movdiri %esi, (%rdi) # encoding: [0x0f,0x38,0xf9,0x37] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_movdiri: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.directstore32(ptr %p, i32 %v) ret void @@ -22,17 +28,22 @@ entry: declare void @llvm.x86.directstore32(ptr, i32) define void @test_movdir64b(ptr %dst, ptr %src) { -; X64-LABEL: test_movdir64b: -; X64: # %bb.0: # %entry -; X64-NEXT: movdir64b (%rsi), %rdi -; X64-NEXT: retq -; ; X32-LABEL: test_movdir64b: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movdir64b (%eax), %ecx ; X32-NEXT: retl +; +; X64-LABEL: test_movdir64b: +; X64: # %bb.0: # %entry +; X64-NEXT: movdir64b (%rsi), %rdi # encoding: [0x66,0x0f,0x38,0xf8,0x3e] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_movdir64b: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.movdir64b(ptr %dst, ptr %src) ret void diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll index b20d7df26515d..ddd44f6d73d59 100644 --- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll +++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR define void @test_movdiri(ptr %p, i64 %v) { ; CHECK-LABEL: test_movdiri: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movdiri %rsi, (%rdi) -; CHECK-NEXT: retq +; CHECK-NEXT: movdiri %rsi, (%rdi) # encoding: [0x48,0x0f,0x38,0xf9,0x37] +; CHECK-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_movdiri: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.directstore64(ptr %p, i64 %v) ret void diff --git a/llvm/test/CodeGen/X86/sha.ll b/llvm/test/CodeGen/X86/sha.ll index d5427556dc0bb..d8fa354a39135 100644 --- a/llvm/test/CodeGen/X86/sha.ll +++ b/llvm/test/CodeGen/X86/sha.ll @@ -1,24 +1,45 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=EGPR declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1rnds4rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1rnds4rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1rnds4rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x0f,0x3a,0xcc,0xc1,0x03] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1rnds4rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) ret <4 x i32> %0 } define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1rnds4rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1rnds4rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1rnds4rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1rnds4rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) @@ -28,20 +49,40 @@ entry: declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1nexterr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1nexte %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1nexterr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1nexte %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1nexterr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc8,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1nexterr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1nexterm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1nexte (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1nexterm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1nexte (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1nexterm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc8,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1nexterm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0) @@ -51,20 +92,40 @@ entry: declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg1rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg1 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg1rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg1 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg1rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc9,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg1rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg1rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg1 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg1rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg1 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg1rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc9,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg1rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0) @@ -74,20 +135,40 @@ entry: declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg2rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg2 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg2rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg2 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg2rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xca,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg2rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg2rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg2 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg2rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg2 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg2rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xca,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg2rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0) @@ -107,11 +188,19 @@ define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) n ; ; AVX-LABEL: test_sha256rnds2rr: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovaps %xmm0, %xmm3 -; AVX-NEXT: vmovaps %xmm2, %xmm0 -; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 -; AVX-NEXT: vmovaps %xmm3, %xmm0 -; AVX-NEXT: retq +; AVX-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8] +; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] +; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x0f,0x38,0xcb,0xd9] +; AVX-NEXT: vmovaps %xmm3, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc3] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256rnds2rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] +; EGPR-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] +; EGPR-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9] +; EGPR-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) ret <4 x i32> %0 @@ -128,11 +217,19 @@ define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwin ; ; AVX-LABEL: test_sha256rnds2rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovaps %xmm0, %xmm2 -; AVX-NEXT: vmovaps %xmm1, %xmm0 -; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 -; AVX-NEXT: vmovaps %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX-NEXT: vmovaps %xmm0, %xmm2 # encoding: [0xc5,0xf8,0x28,0xd0] +; AVX-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] +; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x0f,0x38,0xcb,0x17] +; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256rnds2rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0] +; EGPR-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; EGPR-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17] +; EGPR-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c) @@ -142,20 +239,40 @@ entry: declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg1rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg1 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg1rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg1 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg1rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcc,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg1rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg1rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg1 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg1rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg1 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg1rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcc,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg1rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0) @@ -165,20 +282,40 @@ entry: declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg2rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg2 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg2rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg2 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg2rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcd,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg2rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg2rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg2 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg2rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg2 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg2rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcd,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg2rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0) @@ -195,12 +332,20 @@ define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwta ; ; AVX-LABEL: test_sha1rnds4_zero_extend: ; AVX: # %bb.0: # %entry -; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 -; AVX-NEXT: vmovaps %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03] +; AVX-NEXT: vmovaps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc0] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1rnds4_zero_extend: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; EGPR-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> ret <8 x i32> %2 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll index 4c28c8ab43699..bf87ae5cac05a 100644 --- a/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll +++ b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR define void @test_incsspd(i32 %a) local_unnamed_addr { ; CHECK-LABEL: test_incsspd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: incsspd %edi -; CHECK-NEXT: retq +; CHECK-NEXT: incsspd %edi ## encoding: [0xf3,0x0f,0xae,0xef] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_incsspd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: incsspd %edi ## encoding: [0xf3,0x0f,0xae,0xef] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.incsspd(i32 %a) ret void @@ -16,9 +22,15 @@ declare void @llvm.x86.incsspd(i32) define void @test_incsspq(i32 %a) local_unnamed_addr { ; CHECK-LABEL: test_incsspq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movslq %edi, %rax -; CHECK-NEXT: incsspq %rax -; CHECK-NEXT: retq +; CHECK-NEXT: movslq %edi, %rax ## encoding: [0x48,0x63,0xc7] +; CHECK-NEXT: incsspq %rax ## encoding: [0xf3,0x48,0x0f,0xae,0xe8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_incsspq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: movslq %edi, %rax ## encoding: [0x48,0x63,0xc7] +; EGPR-NEXT: incsspq %rax ## encoding: [0xf3,0x48,0x0f,0xae,0xe8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: %conv.i = sext i32 %a to i64 tail call void @llvm.x86.incsspq(i64 %conv.i) @@ -30,9 +42,15 @@ declare void @llvm.x86.incsspq(i64) define i32 @test_rdsspd(i32 %a) { ; CHECK-LABEL: test_rdsspd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: rdsspd %eax -; CHECK-NEXT: retq +; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; CHECK-NEXT: rdsspd %eax ## encoding: [0xf3,0x0f,0x1e,0xc8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_rdsspd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: rdsspd %eax ## encoding: [0xf3,0x0f,0x1e,0xc8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: %0 = call i32 @llvm.x86.rdsspd(i32 %a) ret i32 %0 @@ -43,9 +61,15 @@ declare i32 @llvm.x86.rdsspd(i32) define i64 @test_rdsspq(i64 %a) { ; CHECK-LABEL: test_rdsspq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: rdsspq %rax -; CHECK-NEXT: retq +; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: rdsspq %rax ## encoding: [0xf3,0x48,0x0f,0x1e,0xc8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_rdsspq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: rdsspq %rax ## encoding: [0xf3,0x48,0x0f,0x1e,0xc8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: %0 = call i64 @llvm.x86.rdsspq(i64 %a) ret i64 %0 @@ -56,8 +80,13 @@ declare i64 @llvm.x86.rdsspq(i64) define void @test_saveprevssp() { ; CHECK-LABEL: test_saveprevssp: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: saveprevssp -; CHECK-NEXT: retq +; CHECK-NEXT: saveprevssp ## encoding: [0xf3,0x0f,0x01,0xea] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_saveprevssp: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: saveprevssp ## encoding: [0xf3,0x0f,0x01,0xea] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.saveprevssp() ret void @@ -68,8 +97,13 @@ declare void @llvm.x86.saveprevssp() define void @test_rstorssp(ptr %__p) { ; CHECK-LABEL: test_rstorssp: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: rstorssp (%rdi) -; CHECK-NEXT: retq +; CHECK-NEXT: rstorssp (%rdi) ## encoding: [0xf3,0x0f,0x01,0x2f] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_rstorssp: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: rstorssp (%rdi) ## encoding: [0xf3,0x0f,0x01,0x2f] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.rstorssp(ptr %__p) ret void @@ -80,8 +114,13 @@ declare void @llvm.x86.rstorssp(ptr) define void @test_wrssd(i32 %a, ptr %__p) { ; CHECK-LABEL: test_wrssd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrssd %edi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrssd %edi, (%rsi) ## encoding: [0x0f,0x38,0xf6,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrssd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrssd(i32 %a, ptr %__p) ret void @@ -92,8 +131,13 @@ declare void @llvm.x86.wrssd(i32, ptr) define void @test_wrssq(i64 %a, ptr %__p) { ; CHECK-LABEL: test_wrssq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrssq %rdi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x48,0x0f,0x38,0xf6,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrssq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrssq(i64 %a, ptr %__p) ret void @@ -104,8 +148,13 @@ declare void @llvm.x86.wrssq(i64, ptr) define void @test_wrussd(i32 %a, ptr %__p) { ; CHECK-LABEL: test_wrussd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrussd %edi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrussd %edi, (%rsi) ## encoding: [0x66,0x0f,0x38,0xf5,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrussd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrussd(i32 %a, ptr %__p) ret void @@ -116,8 +165,13 @@ declare void @llvm.x86.wrussd(i32, ptr) define void @test_wrussq(i64 %a, ptr %__p) { ; CHECK-LABEL: test_wrussq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrussq %rdi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x66,0x48,0x0f,0x38,0xf5,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrussq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrussq(i64 %a, ptr %__p) ret void @@ -128,8 +182,13 @@ declare void @llvm.x86.wrussq(i64, ptr) define void @test_setssbsy() { ; CHECK-LABEL: test_setssbsy: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: setssbsy -; CHECK-NEXT: retq +; CHECK-NEXT: setssbsy ## encoding: [0xf3,0x0f,0x01,0xe8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_setssbsy: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: setssbsy ## encoding: [0xf3,0x0f,0x01,0xe8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.setssbsy() ret void @@ -140,8 +199,13 @@ declare void @llvm.x86.setssbsy() define void @test_clrssbsy(ptr %__p) { ; CHECK-LABEL: test_clrssbsy: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: clrssbsy (%rdi) -; CHECK-NEXT: retq +; CHECK-NEXT: clrssbsy (%rdi) ## encoding: [0xf3,0x0f,0xae,0x37] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_clrssbsy: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: clrssbsy (%rdi) ## encoding: [0xf3,0x0f,0xae,0x37] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.clrssbsy(ptr %__p) ret void