Skip to content

[X86][MC] Support Enc/Dec for EGPR for promoted BMI instructions #73899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,10 +1115,10 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::MRMSrcMem4VOp3: {
// Instruction format for 4VOp3:
// src1(ModR/M), MemAddr, src3(VEX_4V)
Prefix.setR(MI, CurOp++);
Prefix.setRR2(MI, CurOp++);
Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
Prefix.set4V(MI, CurOp + X86::AddrNumOperands);
Prefix.set4VV2(MI, CurOp + X86::AddrNumOperands);
break;
}
case X86II::MRMSrcMemOp4: {
Expand Down Expand Up @@ -1189,7 +1189,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// src1(ModR/M), src2(ModR/M), src3(VEX_4V)
Prefix.setRR2(MI, CurOp++);
Prefix.setBB2(MI, CurOp++);
Prefix.set4V(MI, CurOp++);
Prefix.set4VV2(MI, CurOp++);
break;
}
case X86II::MRMSrcRegOp4: {
Expand Down
58 changes: 42 additions & 16 deletions llvm/lib/Target/X86/X86InstrArithmetic.td
Original file line number Diff line number Diff line change
Expand Up @@ -1289,21 +1289,34 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
//
multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
PatFrag ld_frag, X86FoldableSchedWrite sched> {
let Predicates = [HasBMI, NoEGPR] in {
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
Sched<[sched]>;
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
VEX_4V, Sched<[sched]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
VEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasBMI, HasEGPR, In64BitMode] in {
def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
EVEX_4V, Sched<[sched]>;
def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}

// Complexity is reduced to give and with immediate a chance to match first.
let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V;
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, REX_W;
let Defs = [EFLAGS], AddedComplexity = -6 in {
defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS;
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, REX_W;
}

let Predicates = [HasBMI], AddedComplexity = -6 in {
Expand All @@ -1323,6 +1336,7 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in {
let Predicates = [HasBMI2, NoEGPR] in {
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
[]>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
Expand All @@ -1346,15 +1360,27 @@ let hasSideEffects = 0 in {
def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src),
[]>, Sched<[sched.Folded]>;
}
let Predicates = [HasBMI2, HasEGPR, In64BitMode] in
def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
[]>, T8XD, EVEX_4V, Sched<[WriteIMulH, sched]>;
let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in
def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
[]>, T8XD, EVEX_4V,
Sched<[WriteIMulHLd, sched.Folded,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Implicit read of EDX/RDX
sched.ReadAfterFold]>;
}

let Predicates = [HasBMI2] in {
let Uses = [EDX] in
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
}

let Uses = [EDX] in
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;

//===----------------------------------------------------------------------===//
// ADCX and ADOX Instructions
//
Expand Down
152 changes: 83 additions & 69 deletions llvm/lib/Target/X86/X86InstrMisc.td
Original file line number Diff line number Diff line change
Expand Up @@ -1214,19 +1214,19 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {

multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
RegisterClass RC, X86MemOperand x86memop,
X86FoldableSchedWrite sched> {
X86FoldableSchedWrite sched, string Suffix = ""> {
let hasSideEffects = 0 in {
def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
T8PS, VEX_4V, Sched<[sched]>;
def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
T8PS, VEX_4V, Sched<[sched]>;
let mayLoad = 1 in
def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
T8PS, VEX_4V, Sched<[sched.Folded]>;
def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
T8PS, VEX_4V, Sched<[sched.Folded]>;
}
}

let Predicates = [HasBMI], Defs = [EFLAGS] in {
let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W;
defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
Expand All @@ -1235,6 +1235,15 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W;
}

let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
}

//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate BMI instructions.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1292,56 +1301,50 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}

multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode,
PatFrag ld_frag, X86FoldableSchedWrite Sched> {
def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
T8PS, VEX, Sched<[Sched]>;
def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
(implicit EFLAGS)]>, T8PS, VEX,
Sched<[Sched.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
Sched.ReadAfterFold]>;
multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC,
X86MemOperand x86memop, SDPatternOperator OpNode,
PatFrag ld_frag, X86FoldableSchedWrite Sched,
string Suffix = ""> {
def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
T8PS, VEX, Sched<[Sched]>;
let mayLoad = 1 in
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why add mayLoad = 1 here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I tried to generate mca test before, it miss the this attribute mayload if we don't explicitliy set it.

def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
(implicit EFLAGS)]>, T8PS, VEX,
Sched<[Sched.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
Sched.ReadAfterFold]>;
}

let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
X86bextr, loadi32, WriteBEXTR>;
defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
X86bextr, loadi64, WriteBEXTR>, REX_W;
}

multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
X86MemOperand x86memop, SDNode Int,
PatFrag ld_frag, X86FoldableSchedWrite Sched> {
def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
T8PS, VEX, Sched<[Sched]>;
def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
(implicit EFLAGS)]>, T8PS, VEX,
Sched<[Sched.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
Sched.ReadAfterFold]>;
}

let Predicates = [HasBMI2], Defs = [EFLAGS] in {
defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
X86bzhi, loadi32, WriteBZHI>;
defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
X86bzhi, loadi64, WriteBZHI>, REX_W;
let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
X86bextr, loadi32, WriteBEXTR>;
defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
X86bextr, loadi64, WriteBEXTR>, REX_W;
}
let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in {
defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
X86bzhi, loadi32, WriteBZHI>;
defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
X86bzhi, loadi64, WriteBZHI>, REX_W;
}
let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX;
defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W;
}
let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in {
defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX;
defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W;
}

def CountTrailingOnes : SDNodeXForm<imm, [{
Expand Down Expand Up @@ -1383,19 +1386,19 @@ let Predicates = [HasBMI2, NoTBM] in {
}

multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode,
PatFrag ld_frag> {
def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
VEX_4V, Sched<[WriteALU]>;
def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}

let Predicates = [HasBMI2] in {
X86MemOperand x86memop, SDPatternOperator OpNode,
PatFrag ld_frag, string Suffix = ""> {
def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
VEX_4V, Sched<[WriteALU]>;
def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}

let Predicates = [HasBMI2, NoEGPR] in {
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
X86pdep, loadi32>, T8XD;
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
Expand All @@ -1406,6 +1409,17 @@ let Predicates = [HasBMI2] in {
X86pext, loadi64>, T8XS, REX_W;
}

let Predicates = [HasBMI2, HasEGPR] in {
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
X86pdep, loadi32, "_EVEX">, T8XD, EVEX;
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
X86pdep, loadi64, "_EVEX">, T8XD, REX_W, EVEX;
defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
X86pext, loadi32, "_EVEX">, T8XS, EVEX;
defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
X86pext, loadi64, "_EVEX">, T8XS, REX_W, EVEX;
}

//===----------------------------------------------------------------------===//
// Lightweight Profiling Instructions

Expand Down
59 changes: 37 additions & 22 deletions llvm/lib/Target/X86/X86InstrShiftRotate.td
Original file line number Diff line number Diff line change
Expand Up @@ -824,38 +824,40 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{

// NOTE: We use WriteShift for these rotates as they avoid the stalls
// of many of the older x86 rotate instructions.
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop,
string Suffix = ""> {
let hasSideEffects = 0 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TAXD, VEX, Sched<[WriteShift]>;
def ri#Suffix : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
TAXD, VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TAXD, VEX, Sched<[WriteShiftLd]>;
def mi#Suffix : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
TAXD, VEX, Sched<[WriteShiftLd]>;
}
}

multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop,
string Suffix = ""> {
let hasSideEffects = 0 in {
def rr : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
VEX, Sched<[WriteShift]>;
def rr#Suffix : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
def rm : I<0xF7, MRMSrcMem4VOp3,
(outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
VEX, Sched<[WriteShift.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
WriteShift.ReadAfterFold]>;
def rm#Suffix : I<0xF7, MRMSrcMem4VOp3,
(outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
VEX, Sched<[WriteShift.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
WriteShift.ReadAfterFold]>;
}
}

let Predicates = [HasBMI2] in {
let Predicates = [HasBMI2, NoEGPR] in {
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, REX_W;
defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
Expand All @@ -864,7 +866,20 @@ let Predicates = [HasBMI2] in {
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, REX_W;
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD;
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, REX_W;
}

let Predicates = [HasBMI2, HasEGPR] in {
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX;
defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX;
defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8XS, EVEX;
defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8XS, REX_W, EVEX;
defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8XD, EVEX;
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8XD, REX_W, EVEX;
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8PD, EVEX;
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8PD, REX_W, EVEX;
}

let Predicates = [HasBMI2] in {
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
Expand Down
Loading