Skip to content

Commit a5902a4

Browse files
committed
[X86][NFC] Rename variables/passes for EVEX compression optimization
RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031 APX introduces EGPR, NDD and NF instructions. In addition to compressing EVEX encoded AVX512 instructions into VEX encoding, we also have several more possible optimizations. a. Promoted instruction (EVEX space) -> pre-promotion instruction (legacy space) b. NDD (EVEX space) -> non-NDD (legacy space) c. NF_ND (EVEX space) -> NF (EVEX space) The first two types of compression can usually reduce code size, while the third type of compression can help hardware decode although the instruction length remains unchanged. So we do the renaming for the upcoming APX optimizations. BTW, I clang-format the code in X86CompressEVEX.cpp, X86CompressEVEXTablesEmitter.cpp. This patch also extracts the NFC in llvm#77065 into a separate commit.
1 parent ba3ef33 commit a5902a4

File tree

9 files changed

+105
-93
lines changed

9 files changed

+105
-93
lines changed

llvm/lib/Target/X86/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
88
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
99
tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
1010
tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler)
11-
tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
11+
tablegen(LLVM X86GenCompressEVEXTables.inc -gen-x86-compress-evex-tables)
1212
tablegen(LLVM X86GenExegesis.inc -gen-exegesis)
1313
tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
1414
tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
@@ -61,7 +61,7 @@ set(sources
6161
X86InstrFMA3Info.cpp
6262
X86InstrFoldTables.cpp
6363
X86InstrInfo.cpp
64-
X86EvexToVex.cpp
64+
X86CompressEVEX.cpp
6565
X86LoadValueInjectionLoadHardening.cpp
6666
X86LoadValueInjectionRetHardening.cpp
6767
X86MCInstLower.cpp

llvm/lib/Target/X86/X86.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,9 @@ FunctionPass *createX86FixupBWInsts();
131131
/// to another, when profitable.
132132
FunctionPass *createX86DomainReassignmentPass();
133133

134-
/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
135-
/// encoding when possible in order to reduce code size.
136-
FunctionPass *createX86EvexToVexInsts();
134+
/// This pass compress instructions from EVEX space to legacy/VEX/EVEX space when
135+
/// possible in order to reduce code size or facilitate HW decoding.
136+
FunctionPass *createX86CompressEVEXPass();
137137

138138
/// This pass creates the thunks for the retpoline feature.
139139
FunctionPass *createX86IndirectThunksPass();
@@ -167,7 +167,7 @@ FunctionPass *createX86SpeculativeLoadHardeningPass();
167167
FunctionPass *createX86SpeculativeExecutionSideEffectSuppression();
168168
FunctionPass *createX86ArgumentStackSlotPass();
169169

170-
void initializeEvexToVexInstPassPass(PassRegistry &);
170+
void initializeCompressEVEXPassPass(PassRegistry &);
171171
void initializeFPSPass(PassRegistry &);
172172
void initializeFixupBWInstPassPass(PassRegistry &);
173173
void initializeFixupLEAPassPass(PassRegistry &);

llvm/lib/Target/X86/X86EvexToVex.cpp renamed to llvm/lib/Target/X86/X86CompressEVEX.cpp

Lines changed: 63 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,39 @@
1-
//===- X86EvexToVex.cpp ---------------------------------------------------===//
2-
// Compress EVEX instructions to VEX encoding when possible to reduce code size
1+
//===- X86CompressEVEX.cpp ------------------------------------------------===//
32
//
43
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
54
// See https://llvm.org/LICENSE.txt for license information.
65
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
76
//
87
//===----------------------------------------------------------------------===//
98
//
10-
/// \file
11-
/// This file defines the pass that goes over all AVX-512 instructions which
12-
/// are encoded using the EVEX prefix and if possible replaces them by their
13-
/// corresponding VEX encoding which is usually shorter by 2 bytes.
14-
/// EVEX instructions may be encoded via the VEX prefix when the AVX-512
15-
/// instruction has a corresponding AVX/AVX2 opcode, when vector length
16-
/// accessed by instruction is less than 512 bits and when it does not use
17-
// the xmm or the mask registers or xmm/ymm registers with indexes higher
18-
// than 15.
19-
/// The pass applies code reduction on the generated code for AVX-512 instrs.
9+
// This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10+
// when possible in order to reduce code size or facilitate HW decoding.
2011
//
12+
// Possible compression:
13+
// a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14+
// b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy)
15+
// c. NDD (EVEX) -> non-NDD (legacy)
16+
// d. NF_ND (EVEX) -> NF (EVEX)
17+
//
18+
// Compression a, b and c always reduce code size (some exception)
19+
// fourth type of compression can help hardware decode although the instruction
20+
// length remains unchanged.
21+
//
22+
// Compression a, b and c can always reduce code size, with some exceptions
23+
// such as promoted 16-bit CRC32 which is as long as the legacy version.
24+
//
25+
// legacy:
26+
// crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
27+
// promoted:
28+
// crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
29+
//
30+
// From performance perspective, these should be same (same uops and same EXE
31+
// ports). From a FMV perspective, an older legacy encoding is preferred b/c it
32+
// can execute in more places (broader HW install base). So we will still do
33+
// the compression.
34+
//
35+
// Compression d can help hardware decode (HW may skip reading the NDD
36+
// register) although the instruction length remains unchanged.
2137
//===----------------------------------------------------------------------===//
2238

2339
#include "MCTargetDesc/X86BaseInfo.h"
@@ -38,37 +54,34 @@
3854

3955
using namespace llvm;
4056

41-
// Including the generated EVEX2VEX tables.
42-
struct X86EvexToVexCompressTableEntry {
43-
uint16_t EvexOpc;
44-
uint16_t VexOpc;
57+
// Including the generated EVEX compression tables.
58+
struct X86CompressEVEXTableEntry {
59+
uint16_t OldOpc;
60+
uint16_t NewOpc;
4561

46-
bool operator<(const X86EvexToVexCompressTableEntry &RHS) const {
47-
return EvexOpc < RHS.EvexOpc;
62+
bool operator<(const X86CompressEVEXTableEntry &RHS) const {
63+
return OldOpc < RHS.OldOpc;
4864
}
4965

50-
friend bool operator<(const X86EvexToVexCompressTableEntry &TE,
51-
unsigned Opc) {
52-
return TE.EvexOpc < Opc;
66+
friend bool operator<(const X86CompressEVEXTableEntry &TE, unsigned Opc) {
67+
return TE.OldOpc < Opc;
5368
}
5469
};
55-
#include "X86GenEVEX2VEXTables.inc"
70+
#include "X86GenCompressEVEXTables.inc"
5671

57-
#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
58-
#define EVEX2VEX_NAME "x86-evex-to-vex-compress"
72+
#define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
73+
#define COMP_EVEX_NAME "x86-compress-evex"
5974

60-
#define DEBUG_TYPE EVEX2VEX_NAME
75+
#define DEBUG_TYPE COMP_EVEX_NAME
6176

6277
namespace {
6378

64-
class EvexToVexInstPass : public MachineFunctionPass {
79+
class CompressEVEXPass : public MachineFunctionPass {
6580
public:
6681
static char ID;
67-
EvexToVexInstPass() : MachineFunctionPass(ID) {}
68-
StringRef getPassName() const override { return EVEX2VEX_DESC; }
82+
CompressEVEXPass() : MachineFunctionPass(ID) {}
83+
StringRef getPassName() const override { return COMP_EVEX_DESC; }
6984

70-
/// Loop over all of the basic blocks, replacing EVEX instructions
71-
/// by equivalent VEX instructions when possible for reducing code size.
7285
bool runOnMachineFunction(MachineFunction &MF) override;
7386

7487
// This pass runs after regalloc and doesn't support VReg operands.
@@ -80,7 +93,7 @@ class EvexToVexInstPass : public MachineFunctionPass {
8093

8194
} // end anonymous namespace
8295

83-
char EvexToVexInstPass::ID = 0;
96+
char CompressEVEXPass::ID = 0;
8497

8598
static bool usesExtendedRegister(const MachineInstr &MI) {
8699
auto isHiRegIdx = [](unsigned Reg) {
@@ -112,8 +125,8 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
112125
return false;
113126
}
114127

115-
static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
116-
switch (EvexOpc) {
128+
static bool checkVEXInstPredicate(unsigned OldOpc, const X86Subtarget &ST) {
129+
switch (OldOpc) {
117130
default:
118131
return true;
119132
case X86::VCVTNEPS2BF16Z128rm:
@@ -151,15 +164,15 @@ static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
151164
}
152165

153166
// Do any custom cleanup needed to finalize the conversion.
154-
static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
155-
(void)VexOpc;
167+
static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
168+
(void)NewOpc;
156169
unsigned Opc = MI.getOpcode();
157170
switch (Opc) {
158171
case X86::VALIGNDZ128rri:
159172
case X86::VALIGNDZ128rmi:
160173
case X86::VALIGNQZ128rri:
161174
case X86::VALIGNQZ128rmi: {
162-
assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
175+
assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
163176
"Unexpected new opcode!");
164177
unsigned Scale =
165178
(Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
@@ -175,8 +188,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
175188
case X86::VSHUFI32X4Z256rri:
176189
case X86::VSHUFI64X2Z256rmi:
177190
case X86::VSHUFI64X2Z256rri: {
178-
assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
179-
VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
191+
assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
192+
NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
180193
"Unexpected new opcode!");
181194
MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
182195
int64_t ImmVal = Imm.getImm();
@@ -200,7 +213,7 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
200213
case X86::VRNDSCALESDZm_Int:
201214
case X86::VRNDSCALESSZr_Int:
202215
case X86::VRNDSCALESSZm_Int:
203-
const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
216+
const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
204217
int64_t ImmVal = Imm.getImm();
205218
// Ensure that only bits 3:0 of the immediate are used.
206219
if ((ImmVal & 0xf) != ImmVal)
@@ -239,28 +252,28 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
239252
return false;
240253

241254
// Use the VEX.L bit to select the 128 or 256-bit table.
242-
ArrayRef<X86EvexToVexCompressTableEntry> Table =
255+
ArrayRef<X86CompressEVEXTableEntry> Table =
243256
(Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
244257
: ArrayRef(X86EvexToVex128CompressTable);
245258

246-
unsigned EvexOpc = MI.getOpcode();
247-
const auto *I = llvm::lower_bound(Table, EvexOpc);
248-
if (I == Table.end() || I->EvexOpc != EvexOpc)
259+
unsigned Opc = MI.getOpcode();
260+
const auto *I = llvm::lower_bound(Table, Opc);
261+
if (I == Table.end() || I->OldOpc != Opc)
249262
return false;
250263

251264
if (usesExtendedRegister(MI))
252265
return false;
253-
if (!checkVEXInstPredicate(EvexOpc, ST))
266+
if (!checkVEXInstPredicate(Opc, ST))
254267
return false;
255-
if (!performCustomAdjustments(MI, I->VexOpc))
268+
if (!performCustomAdjustments(MI, I->NewOpc))
256269
return false;
257270

258-
MI.setDesc(ST.getInstrInfo()->get(I->VexOpc));
271+
MI.setDesc(ST.getInstrInfo()->get(I->NewOpc));
259272
MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
260273
return true;
261274
}
262275

263-
bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
276+
bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
264277
#ifndef NDEBUG
265278
// Make sure the tables are sorted.
266279
static std::atomic<bool> TableChecked(false);
@@ -289,8 +302,8 @@ bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
289302
return Changed;
290303
}
291304

292-
INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
305+
INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false)
293306

294-
FunctionPass *llvm::createX86EvexToVexInsts() {
295-
return new EvexToVexInstPass();
307+
FunctionPass *llvm::createX86CompressEVEXPass() {
308+
return new CompressEVEXPass();
296309
}

llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
7575
initializeGlobalISel(PR);
7676
initializeWinEHStatePassPass(PR);
7777
initializeFixupBWInstPassPass(PR);
78-
initializeEvexToVexInstPassPass(PR);
78+
initializeCompressEVEXPassPass(PR);
7979
initializeFixupLEAPassPass(PR);
8080
initializeFPSPass(PR);
8181
initializeX86FixupSetCCPassPass(PR);
@@ -575,7 +575,7 @@ void X86PassConfig::addPreEmitPass() {
575575
addPass(createX86FixupInstTuning());
576576
addPass(createX86FixupVectorConstants());
577577
}
578-
addPass(createX86EvexToVexInsts());
578+
addPass(createX86CompressEVEXPass());
579579
addPass(createX86DiscriminateMemOpsPass());
580580
addPass(createX86InsertPrefetchPass());
581581
addPass(createX86InsertX87waitPass());

llvm/test/CodeGen/X86/O0-pipeline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
; CHECK-NEXT: Implement the 'patchable-function' attribute
6969
; CHECK-NEXT: X86 Indirect Branch Tracking
7070
; CHECK-NEXT: X86 vzeroupper inserter
71-
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possibl
71+
; CHECK-NEXT: Compressing EVEX instrs when possible
7272
; CHECK-NEXT: X86 Discriminate Memory Operands
7373
; CHECK-NEXT: X86 Insert Cache Prefetches
7474
; CHECK-NEXT: X86 insert wait instruction

llvm/test/CodeGen/X86/evex-to-vex-compress.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: llc -mtriple=x86_64-- -run-pass x86-evex-to-vex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s
1+
# RUN: llc -mtriple=x86_64-- -run-pass x86-compress-evex -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s
22
# This test verifies VEX encoding for AVX-512 instructions that use registers of low indexes and
33
# do not use zmm or mask registers and have a corresponding AVX/AVX2 opcode
44

llvm/test/CodeGen/X86/opt-pipeline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@
205205
; CHECK-NEXT: X86 LEA Fixup
206206
; CHECK-NEXT: X86 Fixup Inst Tuning
207207
; CHECK-NEXT: X86 Fixup Vector Constants
208-
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible
208+
; CHECK-NEXT: Compressing EVEX instrs when possible
209209
; CHECK-NEXT: X86 Discriminate Memory Operands
210210
; CHECK-NEXT: X86 Insert Cache Prefetches
211211
; CHECK-NEXT: X86 insert wait instruction

llvm/utils/TableGen/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ add_tablegen(llvm-tblgen LLVM
8282
Types.cpp
8383
VarLenCodeEmitterGen.cpp
8484
X86DisassemblerTables.cpp
85-
X86EVEX2VEXTablesEmitter.cpp
85+
X86CompressEVEXTablesEmitter.cpp
8686
X86FoldTablesEmitter.cpp
8787
X86MnemonicTables.cpp
8888
X86ModRMFilters.cpp

0 commit comments

Comments
 (0)