1
- // ===- X86EvexToVex.cpp ---------------------------------------------------===//
2
- // Compress EVEX instructions to VEX encoding when possible to reduce code size
1
+ // ===- X86CompressEVEX.cpp ------------------------------------------------===//
3
2
//
4
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5
4
// See https://llvm.org/LICENSE.txt for license information.
6
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7
6
//
8
7
// ===----------------------------------------------------------------------===//
9
8
//
10
- // / \file
11
- // / This file defines the pass that goes over all AVX-512 instructions which
12
- // / are encoded using the EVEX prefix and if possible replaces them by their
13
- // / corresponding VEX encoding which is usually shorter by 2 bytes.
14
- // / EVEX instructions may be encoded via the VEX prefix when the AVX-512
15
- // / instruction has a corresponding AVX/AVX2 opcode, when vector length
16
- // / accessed by instruction is less than 512 bits and when it does not use
17
- // the xmm or the mask registers or xmm/ymm registers with indexes higher
18
- // than 15.
19
- // / The pass applies code reduction on the generated code for AVX-512 instrs.
9
+ // This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10
+ // when possible in order to reduce code size or facilitate HW decoding.
20
11
//
12
+ // Possible compression:
13
+ // a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14
+ // b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy)
15
+ // c. NDD (EVEX) -> non-NDD (legacy)
16
+ // d. NF_ND (EVEX) -> NF (EVEX)
17
+ //
18
+ // Compression a, b and c always reduce code size (some exception)
19
+ // fourth type of compression can help hardware decode although the instruction
20
+ // length remains unchanged.
21
+ //
22
+ // Compression a, b and c can always reduce code size, with some exceptions
23
+ // such as promoted 16-bit CRC32 which is as long as the legacy version.
24
+ //
25
+ // legacy:
26
+ // crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
27
+ // promoted:
28
+ // crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
29
+ //
30
+ // From performance perspective, these should be same (same uops and same EXE
31
+ // ports). From a FMV perspective, an older legacy encoding is preferred b/c it
32
+ // can execute in more places (broader HW install base). So we will still do
33
+ // the compression.
34
+ //
35
+ // Compression d can help hardware decode (HW may skip reading the NDD
36
+ // register) although the instruction length remains unchanged.
21
37
// ===----------------------------------------------------------------------===//
22
38
23
39
#include " MCTargetDesc/X86BaseInfo.h"
38
54
39
55
using namespace llvm ;
40
56
41
- // Including the generated EVEX2VEX tables.
42
- struct X86EvexToVexCompressTableEntry {
43
- uint16_t EvexOpc ;
44
- uint16_t VexOpc ;
57
+ // Including the generated EVEX compression tables.
58
+ struct X86CompressEVEXTableEntry {
59
+ uint16_t OldOpc ;
60
+ uint16_t NewOpc ;
45
61
46
- bool operator <(const X86EvexToVexCompressTableEntry &RHS) const {
47
- return EvexOpc < RHS.EvexOpc ;
62
+ bool operator <(const X86CompressEVEXTableEntry &RHS) const {
63
+ return OldOpc < RHS.OldOpc ;
48
64
}
49
65
50
- friend bool operator <(const X86EvexToVexCompressTableEntry &TE,
51
- unsigned Opc) {
52
- return TE.EvexOpc < Opc;
66
+ friend bool operator <(const X86CompressEVEXTableEntry &TE, unsigned Opc) {
67
+ return TE.OldOpc < Opc;
53
68
}
54
69
};
55
- #include " X86GenEVEX2VEXTables .inc"
70
+ #include " X86GenCompressEVEXTables .inc"
56
71
57
- #define EVEX2VEX_DESC " Compressing EVEX instrs to VEX encoding when possible"
58
- #define EVEX2VEX_NAME " x86-evex-to-vex- compress"
72
+ #define COMP_EVEX_DESC " Compressing EVEX instrs when possible"
73
+ #define COMP_EVEX_NAME " x86-compress-evex "
59
74
60
- #define DEBUG_TYPE EVEX2VEX_NAME
75
+ #define DEBUG_TYPE COMP_EVEX_NAME
61
76
62
77
namespace {
63
78
64
- class EvexToVexInstPass : public MachineFunctionPass {
79
+ class CompressEVEXPass : public MachineFunctionPass {
65
80
public:
66
81
static char ID;
67
- EvexToVexInstPass () : MachineFunctionPass(ID) {}
68
- StringRef getPassName () const override { return EVEX2VEX_DESC ; }
82
+ CompressEVEXPass () : MachineFunctionPass(ID) {}
83
+ StringRef getPassName () const override { return COMP_EVEX_DESC ; }
69
84
70
- // / Loop over all of the basic blocks, replacing EVEX instructions
71
- // / by equivalent VEX instructions when possible for reducing code size.
72
85
bool runOnMachineFunction (MachineFunction &MF) override ;
73
86
74
87
// This pass runs after regalloc and doesn't support VReg operands.
@@ -80,7 +93,7 @@ class EvexToVexInstPass : public MachineFunctionPass {
80
93
81
94
} // end anonymous namespace
82
95
83
- char EvexToVexInstPass ::ID = 0 ;
96
+ char CompressEVEXPass ::ID = 0 ;
84
97
85
98
static bool usesExtendedRegister (const MachineInstr &MI) {
86
99
auto isHiRegIdx = [](unsigned Reg) {
@@ -112,8 +125,8 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
112
125
return false ;
113
126
}
114
127
115
- static bool checkVEXInstPredicate (unsigned EvexOpc , const X86Subtarget &ST) {
116
- switch (EvexOpc ) {
128
+ static bool checkVEXInstPredicate (unsigned OldOpc , const X86Subtarget &ST) {
129
+ switch (OldOpc ) {
117
130
default :
118
131
return true ;
119
132
case X86::VCVTNEPS2BF16Z128rm:
@@ -151,15 +164,15 @@ static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
151
164
}
152
165
153
166
// Do any custom cleanup needed to finalize the conversion.
154
- static bool performCustomAdjustments (MachineInstr &MI, unsigned VexOpc ) {
155
- (void )VexOpc ;
167
+ static bool performCustomAdjustments (MachineInstr &MI, unsigned NewOpc ) {
168
+ (void )NewOpc ;
156
169
unsigned Opc = MI.getOpcode ();
157
170
switch (Opc) {
158
171
case X86::VALIGNDZ128rri:
159
172
case X86::VALIGNDZ128rmi:
160
173
case X86::VALIGNQZ128rri:
161
174
case X86::VALIGNQZ128rmi: {
162
- assert ((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
175
+ assert ((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
163
176
" Unexpected new opcode!" );
164
177
unsigned Scale =
165
178
(Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4 ;
@@ -175,8 +188,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
175
188
case X86::VSHUFI32X4Z256rri:
176
189
case X86::VSHUFI64X2Z256rmi:
177
190
case X86::VSHUFI64X2Z256rri: {
178
- assert ((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
179
- VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
191
+ assert ((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
192
+ NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
180
193
" Unexpected new opcode!" );
181
194
MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands () - 1 );
182
195
int64_t ImmVal = Imm.getImm ();
@@ -200,7 +213,7 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
200
213
case X86::VRNDSCALESDZm_Int:
201
214
case X86::VRNDSCALESSZr_Int:
202
215
case X86::VRNDSCALESSZm_Int:
203
- const MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands ()- 1 );
216
+ const MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands () - 1 );
204
217
int64_t ImmVal = Imm.getImm ();
205
218
// Ensure that only bits 3:0 of the immediate are used.
206
219
if ((ImmVal & 0xf ) != ImmVal)
@@ -239,28 +252,28 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
239
252
return false ;
240
253
241
254
// Use the VEX.L bit to select the 128 or 256-bit table.
242
- ArrayRef<X86EvexToVexCompressTableEntry > Table =
255
+ ArrayRef<X86CompressEVEXTableEntry > Table =
243
256
(Desc.TSFlags & X86II::VEX_L) ? ArrayRef (X86EvexToVex256CompressTable)
244
257
: ArrayRef (X86EvexToVex128CompressTable);
245
258
246
- unsigned EvexOpc = MI.getOpcode ();
247
- const auto *I = llvm::lower_bound (Table, EvexOpc );
248
- if (I == Table.end () || I->EvexOpc != EvexOpc )
259
+ unsigned Opc = MI.getOpcode ();
260
+ const auto *I = llvm::lower_bound (Table, Opc );
261
+ if (I == Table.end () || I->OldOpc != Opc )
249
262
return false ;
250
263
251
264
if (usesExtendedRegister (MI))
252
265
return false ;
253
- if (!checkVEXInstPredicate (EvexOpc , ST))
266
+ if (!checkVEXInstPredicate (Opc , ST))
254
267
return false ;
255
- if (!performCustomAdjustments (MI, I->VexOpc ))
268
+ if (!performCustomAdjustments (MI, I->NewOpc ))
256
269
return false ;
257
270
258
- MI.setDesc (ST.getInstrInfo ()->get (I->VexOpc ));
271
+ MI.setDesc (ST.getInstrInfo ()->get (I->NewOpc ));
259
272
MI.setAsmPrinterFlag (X86::AC_EVEX_2_VEX);
260
273
return true ;
261
274
}
262
275
263
- bool EvexToVexInstPass ::runOnMachineFunction (MachineFunction &MF) {
276
+ bool CompressEVEXPass ::runOnMachineFunction (MachineFunction &MF) {
264
277
#ifndef NDEBUG
265
278
// Make sure the tables are sorted.
266
279
static std::atomic<bool > TableChecked (false );
@@ -289,8 +302,8 @@ bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
289
302
return Changed;
290
303
}
291
304
292
- INITIALIZE_PASS (EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC , false , false )
305
+ INITIALIZE_PASS (CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC , false , false )
293
306
294
- FunctionPass *llvm::createX86EvexToVexInsts () {
295
- return new EvexToVexInstPass ();
307
+ FunctionPass *llvm::createX86CompressEVEXPass () {
308
+ return new CompressEVEXPass ();
296
309
}
0 commit comments