Skip to content

Commit de55a28

Browse files
authored
[X86][MC] Support Enc/Dec for EGPR for promoted BMI instructions (#73899)
R16-R31 was added into GPRs in #70958, This patch supports the encoding/decoding for promoted BMI instructions in EVEX space. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
1 parent 5fe7ae8 commit de55a28

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+942
-110
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,10 +1115,10 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
11151115
case X86II::MRMSrcMem4VOp3: {
11161116
// Instruction format for 4VOp3:
11171117
// src1(ModR/M), MemAddr, src3(VEX_4V)
1118-
Prefix.setR(MI, CurOp++);
1118+
Prefix.setRR2(MI, CurOp++);
11191119
Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
11201120
Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
1121-
Prefix.set4V(MI, CurOp + X86::AddrNumOperands);
1121+
Prefix.set4VV2(MI, CurOp + X86::AddrNumOperands);
11221122
break;
11231123
}
11241124
case X86II::MRMSrcMemOp4: {
@@ -1189,7 +1189,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
11891189
// src1(ModR/M), src2(ModR/M), src3(VEX_4V)
11901190
Prefix.setRR2(MI, CurOp++);
11911191
Prefix.setBB2(MI, CurOp++);
1192-
Prefix.set4V(MI, CurOp++);
1192+
Prefix.set4VV2(MI, CurOp++);
11931193
break;
11941194
}
11951195
case X86II::MRMSrcRegOp4: {

llvm/lib/Target/X86/X86InstrArithmetic.td

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,21 +1289,34 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
12891289
//
12901290
multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
12911291
PatFrag ld_frag, X86FoldableSchedWrite sched> {
1292+
let Predicates = [HasBMI, NoEGPR] in {
12921293
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1293-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1294-
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
1295-
Sched<[sched]>;
1294+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1295+
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
1296+
VEX_4V, Sched<[sched]>;
12961297
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1297-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1298-
[(set RC:$dst, EFLAGS,
1299-
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
1300-
Sched<[sched.Folded, sched.ReadAfterFold]>;
1298+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1299+
[(set RC:$dst, EFLAGS,
1300+
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
1301+
VEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
1302+
}
1303+
let Predicates = [HasBMI, HasEGPR, In64BitMode] in {
1304+
def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1305+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1306+
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
1307+
EVEX_4V, Sched<[sched]>;
1308+
def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1309+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1310+
[(set RC:$dst, EFLAGS,
1311+
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
1312+
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
1313+
}
13011314
}
13021315

13031316
// Complexity is reduced to give and with immediate a chance to match first.
1304-
let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
1305-
defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V;
1306-
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, REX_W;
1317+
let Defs = [EFLAGS], AddedComplexity = -6 in {
1318+
defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS;
1319+
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, REX_W;
13071320
}
13081321

13091322
let Predicates = [HasBMI], AddedComplexity = -6 in {
@@ -1323,6 +1336,7 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
13231336
multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
13241337
X86FoldableSchedWrite sched> {
13251338
let hasSideEffects = 0 in {
1339+
let Predicates = [HasBMI2, NoEGPR] in {
13261340
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
13271341
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
13281342
[]>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
@@ -1346,15 +1360,27 @@ let hasSideEffects = 0 in {
13461360
def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src),
13471361
[]>, Sched<[sched.Folded]>;
13481362
}
1363+
let Predicates = [HasBMI2, HasEGPR, In64BitMode] in
1364+
def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
1365+
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
1366+
[]>, T8XD, EVEX_4V, Sched<[WriteIMulH, sched]>;
1367+
let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in
1368+
def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
1369+
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
1370+
[]>, T8XD, EVEX_4V,
1371+
Sched<[WriteIMulHLd, sched.Folded,
1372+
// Memory operand.
1373+
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
1374+
// Implicit read of EDX/RDX
1375+
sched.ReadAfterFold]>;
13491376
}
1350-
1351-
let Predicates = [HasBMI2] in {
1352-
let Uses = [EDX] in
1353-
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
1354-
let Uses = [RDX] in
1355-
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
13561377
}
13571378

1379+
let Uses = [EDX] in
1380+
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
1381+
let Uses = [RDX] in
1382+
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
1383+
13581384
//===----------------------------------------------------------------------===//
13591385
// ADCX and ADOX Instructions
13601386
//

llvm/lib/Target/X86/X86InstrMisc.td

Lines changed: 83 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,19 +1214,19 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
12141214

12151215
multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
12161216
RegisterClass RC, X86MemOperand x86memop,
1217-
X86FoldableSchedWrite sched> {
1217+
X86FoldableSchedWrite sched, string Suffix = ""> {
12181218
let hasSideEffects = 0 in {
1219-
def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
1220-
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
1221-
T8PS, VEX_4V, Sched<[sched]>;
1219+
def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
1220+
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
1221+
T8PS, VEX_4V, Sched<[sched]>;
12221222
let mayLoad = 1 in
1223-
def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
1224-
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
1225-
T8PS, VEX_4V, Sched<[sched.Folded]>;
1223+
def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
1224+
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
1225+
T8PS, VEX_4V, Sched<[sched.Folded]>;
12261226
}
12271227
}
12281228

1229-
let Predicates = [HasBMI], Defs = [EFLAGS] in {
1229+
let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
12301230
defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
12311231
defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W;
12321232
defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
@@ -1235,6 +1235,15 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
12351235
defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W;
12361236
}
12371237

1238+
let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
1239+
defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
1240+
defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
1241+
defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
1242+
defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
1243+
defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
1244+
defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
1245+
}
1246+
12381247
//===----------------------------------------------------------------------===//
12391248
// Pattern fragments to auto generate BMI instructions.
12401249
//===----------------------------------------------------------------------===//
@@ -1292,56 +1301,50 @@ let Predicates = [HasBMI] in {
12921301
(BLSI64rr GR64:$src)>;
12931302
}
12941303

1295-
multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
1296-
X86MemOperand x86memop, SDNode OpNode,
1297-
PatFrag ld_frag, X86FoldableSchedWrite Sched> {
1298-
def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1299-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1300-
[(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
1301-
T8PS, VEX, Sched<[Sched]>;
1302-
def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
1303-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1304-
[(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
1305-
(implicit EFLAGS)]>, T8PS, VEX,
1306-
Sched<[Sched.Folded,
1307-
// x86memop:$src1
1308-
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
1309-
ReadDefault,
1310-
// RC:$src2
1311-
Sched.ReadAfterFold]>;
1304+
multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC,
1305+
X86MemOperand x86memop, SDPatternOperator OpNode,
1306+
PatFrag ld_frag, X86FoldableSchedWrite Sched,
1307+
string Suffix = ""> {
1308+
def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1309+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1310+
[(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
1311+
T8PS, VEX, Sched<[Sched]>;
1312+
let mayLoad = 1 in
1313+
def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
1314+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1315+
[(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
1316+
(implicit EFLAGS)]>, T8PS, VEX,
1317+
Sched<[Sched.Folded,
1318+
// x86memop:$src1
1319+
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
1320+
ReadDefault,
1321+
// RC:$src2
1322+
Sched.ReadAfterFold]>;
13121323
}
13131324

1314-
let Predicates = [HasBMI], Defs = [EFLAGS] in {
1315-
defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
1316-
X86bextr, loadi32, WriteBEXTR>;
1317-
defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
1318-
X86bextr, loadi64, WriteBEXTR>, REX_W;
1319-
}
1320-
1321-
multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
1322-
X86MemOperand x86memop, SDNode Int,
1323-
PatFrag ld_frag, X86FoldableSchedWrite Sched> {
1324-
def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1325-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1326-
[(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
1327-
T8PS, VEX, Sched<[Sched]>;
1328-
def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
1329-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1330-
[(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
1331-
(implicit EFLAGS)]>, T8PS, VEX,
1332-
Sched<[Sched.Folded,
1333-
// x86memop:$src1
1334-
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
1335-
ReadDefault,
1336-
// RC:$src2
1337-
Sched.ReadAfterFold]>;
1338-
}
1339-
1340-
let Predicates = [HasBMI2], Defs = [EFLAGS] in {
1341-
defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
1342-
X86bzhi, loadi32, WriteBZHI>;
1343-
defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
1344-
X86bzhi, loadi64, WriteBZHI>, REX_W;
1325+
let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
1326+
defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
1327+
X86bextr, loadi32, WriteBEXTR>;
1328+
defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
1329+
X86bextr, loadi64, WriteBEXTR>, REX_W;
1330+
}
1331+
let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in {
1332+
defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
1333+
X86bzhi, loadi32, WriteBZHI>;
1334+
defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
1335+
X86bzhi, loadi64, WriteBZHI>, REX_W;
1336+
}
1337+
let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
1338+
defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
1339+
X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX;
1340+
defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
1341+
X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W;
1342+
}
1343+
let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in {
1344+
defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
1345+
X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX;
1346+
defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
1347+
X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W;
13451348
}
13461349

13471350
def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -1383,19 +1386,19 @@ let Predicates = [HasBMI2, NoTBM] in {
13831386
}
13841387

13851388
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
1386-
X86MemOperand x86memop, SDNode OpNode,
1387-
PatFrag ld_frag> {
1388-
def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1389-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1390-
[(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
1391-
VEX_4V, Sched<[WriteALU]>;
1392-
def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1393-
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1394-
[(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
1395-
VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
1396-
}
1397-
1398-
let Predicates = [HasBMI2] in {
1389+
X86MemOperand x86memop, SDPatternOperator OpNode,
1390+
PatFrag ld_frag, string Suffix = ""> {
1391+
def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1392+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1393+
[(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
1394+
VEX_4V, Sched<[WriteALU]>;
1395+
def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1396+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1397+
[(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
1398+
VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
1399+
}
1400+
1401+
let Predicates = [HasBMI2, NoEGPR] in {
13991402
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
14001403
X86pdep, loadi32>, T8XD;
14011404
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
@@ -1406,6 +1409,17 @@ let Predicates = [HasBMI2] in {
14061409
X86pext, loadi64>, T8XS, REX_W;
14071410
}
14081411

1412+
let Predicates = [HasBMI2, HasEGPR] in {
1413+
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
1414+
X86pdep, loadi32, "_EVEX">, T8XD, EVEX;
1415+
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
1416+
X86pdep, loadi64, "_EVEX">, T8XD, REX_W, EVEX;
1417+
defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
1418+
X86pext, loadi32, "_EVEX">, T8XS, EVEX;
1419+
defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
1420+
X86pext, loadi64, "_EVEX">, T8XS, REX_W, EVEX;
1421+
}
1422+
14091423
//===----------------------------------------------------------------------===//
14101424
// Lightweight Profiling Instructions
14111425

llvm/lib/Target/X86/X86InstrShiftRotate.td

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -824,38 +824,40 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{
824824

825825
// NOTE: We use WriteShift for these rotates as they avoid the stalls
826826
// of many of the older x86 rotate instructions.
827-
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
827+
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop,
828+
string Suffix = ""> {
828829
let hasSideEffects = 0 in {
829-
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
830-
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
831-
[]>, TAXD, VEX, Sched<[WriteShift]>;
830+
def ri#Suffix : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
831+
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
832+
TAXD, VEX, Sched<[WriteShift]>;
832833
let mayLoad = 1 in
833-
def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
834-
(ins x86memop:$src1, u8imm:$src2),
835-
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
836-
[]>, TAXD, VEX, Sched<[WriteShiftLd]>;
834+
def mi#Suffix : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
835+
(ins x86memop:$src1, u8imm:$src2),
836+
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
837+
TAXD, VEX, Sched<[WriteShiftLd]>;
837838
}
838839
}
839840

840-
multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
841+
multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop,
842+
string Suffix = ""> {
841843
let hasSideEffects = 0 in {
842-
def rr : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
843-
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
844-
VEX, Sched<[WriteShift]>;
844+
def rr#Suffix : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
845+
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
846+
VEX, Sched<[WriteShift]>;
845847
let mayLoad = 1 in
846-
def rm : I<0xF7, MRMSrcMem4VOp3,
847-
(outs RC:$dst), (ins x86memop:$src1, RC:$src2),
848-
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
849-
VEX, Sched<[WriteShift.Folded,
850-
// x86memop:$src1
851-
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
852-
ReadDefault,
853-
// RC:$src2
854-
WriteShift.ReadAfterFold]>;
848+
def rm#Suffix : I<0xF7, MRMSrcMem4VOp3,
849+
(outs RC:$dst), (ins x86memop:$src1, RC:$src2),
850+
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
851+
VEX, Sched<[WriteShift.Folded,
852+
// x86memop:$src1
853+
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
854+
ReadDefault,
855+
// RC:$src2
856+
WriteShift.ReadAfterFold]>;
855857
}
856858
}
857859

858-
let Predicates = [HasBMI2] in {
860+
let Predicates = [HasBMI2, NoEGPR] in {
859861
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
860862
defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, REX_W;
861863
defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
@@ -864,7 +866,20 @@ let Predicates = [HasBMI2] in {
864866
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, REX_W;
865867
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD;
866868
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, REX_W;
869+
}
867870

871+
let Predicates = [HasBMI2, HasEGPR] in {
872+
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX;
873+
defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX;
874+
defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8XS, EVEX;
875+
defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8XS, REX_W, EVEX;
876+
defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8XD, EVEX;
877+
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8XD, REX_W, EVEX;
878+
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8PD, EVEX;
879+
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8PD, REX_W, EVEX;
880+
}
881+
882+
let Predicates = [HasBMI2] in {
868883
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
869884
let AddedComplexity = 10 in {
870885
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),

0 commit comments

Comments
 (0)