Skip to content

Commit 4ef587a

Browse files
committed
[bolt] simplify constant loads for X86 & AArch64
This patch fixed the issue related to load literal for AArch64 (bolt/test/AArch64/materialize-constant.s), address range for literal is limited +/- 1MB, emitCI puts the constants by the end of function and the one is out of available range. SimplifyRODataLoads is enabled by default for X86 & AArch64 Signed-off-by: Moksyakov Alexey <[email protected]>
1 parent 2ba0529 commit 4ef587a

File tree

6 files changed

+201
-41
lines changed

6 files changed

+201
-41
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1878,6 +1878,13 @@ class MCPlusBuilder {
18781878
return {};
18791879
}
18801880

1881+
virtual InstructionListType materializeConstant(const MCInst &Inst,
1882+
StringRef ConstantData,
1883+
uint64_t Offset) const {
1884+
llvm_unreachable("not implemented");
1885+
return {};
1886+
}
1887+
18811888
/// Creates a new unconditional branch instruction in Inst and set its operand
18821889
/// to TBB.
18831890
virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,7 +1187,8 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
11871187
uint64_t NumDynamicLocalLoadsFound = 0;
11881188

11891189
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1190-
for (MCInst &Inst : *BB) {
1190+
for (auto It = BB->begin(); It != BB->end(); ++It) {
1191+
const MCInst &Inst = *It;
11911192
unsigned Opcode = Inst.getOpcode();
11921193
const MCInstrDesc &Desc = BC.MII->get(Opcode);
11931194

@@ -1200,7 +1201,7 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12001201

12011202
if (MIB->hasPCRelOperand(Inst)) {
12021203
// Try to find the symbol that corresponds to the PC-relative operand.
1203-
MCOperand *DispOpI = MIB->getMemOperandDisp(Inst);
1204+
MCOperand *DispOpI = MIB->getMemOperandDisp(const_cast<MCInst &>(Inst));
12041205
assert(DispOpI != Inst.end() && "expected PC-relative displacement");
12051206
assert(DispOpI->isExpr() &&
12061207
"found PC-relative with non-symbolic displacement");
@@ -1226,28 +1227,53 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12261227
}
12271228

12281229
// Get the contents of the section containing the target address of the
1229-
// memory operand. We are only interested in read-only sections.
1230+
// memory operand. We are only interested in read-only sections for X86,
1231+
// for aarch64 the sections can be read-only or executable.
12301232
ErrorOr<BinarySection &> DataSection =
12311233
BC.getSectionForAddress(TargetAddress);
1232-
if (!DataSection || DataSection->isWritable())
1234+
if (!DataSection)
12331235
continue;
12341236

1237+
if (BC.isX86() && DataSection->isWritable())
1238+
continue;
1239+
1240+
if (DataSection->isText()) {
1241+
// if data is not part of a function, check if it is part of a global CI
1242+
// do not proceed if there aren't data markers for CIs
1243+
BinaryFunction *BFTgt =
1244+
BC.getBinaryFunctionContainingAddress(TargetAddress,
1245+
/*CheckPastEnd*/ false,
1246+
/*UseMaxSize*/ true);
1247+
const bool IsInsideFunc =
1248+
BFTgt && BFTgt->isInConstantIsland(TargetAddress);
1249+
1250+
auto CIEndIter = BC.AddressToConstantIslandMap.end();
1251+
auto CIIter = BC.AddressToConstantIslandMap.find(TargetAddress);
1252+
if (!IsInsideFunc && CIIter == CIEndIter)
1253+
continue;
1254+
}
1255+
12351256
if (BC.getRelocationAt(TargetAddress) ||
12361257
BC.getDynamicRelocationAt(TargetAddress))
12371258
continue;
12381259

1239-
uint32_t Offset = TargetAddress - DataSection->getAddress();
1240-
StringRef ConstantData = DataSection->getContents();
1241-
12421260
++NumLocalLoadsFound;
12431261
if (BB->hasProfile())
12441262
NumDynamicLocalLoadsFound += BB->getExecutionCount();
12451263

1246-
if (MIB->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
1247-
++NumLocalLoadsSimplified;
1248-
if (BB->hasProfile())
1249-
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
1250-
}
1264+
uint32_t Offset = TargetAddress - DataSection->getAddress();
1265+
StringRef ConstantData = DataSection->getContents();
1266+
const InstructionListType Instrs =
1267+
MIB->materializeConstant(Inst, ConstantData, Offset);
1268+
if (Instrs.empty())
1269+
continue;
1270+
1271+
auto IIter = BB->findInstruction(&Inst);
1272+
It = BB->replaceInstruction(IIter, Instrs);
1273+
1274+
++NumLocalLoadsSimplified;
1275+
if (BB->hasProfile())
1276+
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
12511277
}
12521278
}
12531279

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ static cl::opt<bool> SimplifyRODataLoads(
236236
"simplify-rodata-loads",
237237
cl::desc("simplify loads from read-only sections by replacing the memory "
238238
"operand with the constant found in the corresponding section"),
239-
cl::cat(BoltOptCategory));
239+
cl::init(true), cl::cat(BoltOptCategory));
240240

241241
static cl::list<std::string>
242242
SpecializeMemcpy1("memcpy1-spec",

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2797,6 +2797,56 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
27972797
return Insts;
27982798
}
27992799

2800+
InstructionListType materializeConstant(const MCInst &Inst,
2801+
StringRef ConstantData,
2802+
uint64_t Offset) const override {
2803+
struct InstInfo {
2804+
// Size in bytes that Inst loads from memory.
2805+
uint8_t DataSize;
2806+
// number instructions needed to materialize the constant.
2807+
uint8_t numInstrs;
2808+
// Opcode to use for materializing the constant.
2809+
unsigned Opcode;
2810+
};
2811+
2812+
InstInfo I;
2813+
InstructionListType Insts(0);
2814+
switch (Inst.getOpcode()) {
2815+
case AArch64::LDRWl:
2816+
I = {4, 2, AArch64::MOVKWi};
2817+
break;
2818+
case AArch64::LDRXl:
2819+
I = {8, 4, AArch64::MOVKXi};
2820+
break;
2821+
default:
2822+
llvm_unreachable("unexpected ldr instruction");
2823+
break;
2824+
}
2825+
2826+
const uint64_t ConstantSize = ConstantData.size() - Offset > I.DataSize
2827+
? I.DataSize
2828+
: ConstantData.size() - Offset;
2829+
if (ConstantSize != I.DataSize)
2830+
return Insts;
2831+
2832+
const uint64_t ImmVal =
2833+
DataExtractor(ConstantData, true, 8).getUnsigned(&Offset, I.DataSize);
2834+
2835+
Insts.resize(I.numInstrs);
2836+
unsigned shift = (Insts.size() - 1) * 16;
2837+
MCPhysReg Reg = Inst.getOperand(0).getReg();
2838+
for (unsigned i = 0; i < Insts.size(); i++, shift -= 16) {
2839+
Insts[i].setOpcode(I.Opcode);
2840+
Insts[i].clear();
2841+
Insts[i].addOperand(MCOperand::createReg(Reg));
2842+
Insts[i].addOperand(MCOperand::createReg(Reg));
2843+
Insts[i].addOperand(MCOperand::createImm((ImmVal >> shift) & 0xFFFF));
2844+
Insts[i].addOperand(MCOperand::createImm(shift));
2845+
}
2846+
2847+
return Insts;
2848+
}
2849+
28002850
std::optional<Relocation>
28012851
createRelocation(const MCFixup &Fixup,
28022852
const MCAsmBackend &MAB) const override {

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,6 +1477,24 @@ class X86MCPlusBuilder : public MCPlusBuilder {
14771477
return true;
14781478
}
14791479

1480+
InstructionListType materializeConstant(const MCInst &Inst,
1481+
StringRef ConstantData,
1482+
uint64_t Offset) const override {
1483+
InstructionListType Instrs;
1484+
MCInst InstCopy = Inst;
1485+
1486+
if (!replaceMemOperandWithImm(InstCopy, ConstantData, Offset))
1487+
return InstructionListType{};
1488+
1489+
Instrs.emplace_back();
1490+
Instrs.back().setOpcode(InstCopy.getOpcode());
1491+
Instrs.back().clear();
1492+
for (unsigned i = 0; i < InstCopy.getNumOperands(); ++i)
1493+
Instrs.back().addOperand(InstCopy.getOperand(i));
1494+
1495+
return Instrs;
1496+
}
1497+
14801498
/// TODO: this implementation currently works for the most common opcodes that
14811499
/// load from memory. It can be extended to work with memory store opcodes as
14821500
/// well as more memory load opcodes.
Lines changed: 87 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,67 @@
1-
// this test checks a load literal instructions changed to movk
1+
// Test checks that load literal instructions changed to movk
2+
// 1) case for big function where CI moved to the end of the fucntion
3+
// 2) case for CI outside of the function
24

3-
// REQUIRES: system-linux
5+
# REQUIRES: system-linux
46

5-
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
7+
# RUN: rm -rf %t && split-file %s %t
68

7-
# RUN: link_fdata %s %t.o %t.fdata
8-
# RUN: %clang %cflags -pie %t.o -o %t.exe -Wl,-q -Wl,-z,relro -Wl,-z,now
9-
# RUN: llvm-bolt %t.exe -o %t.bolt -data %t.fdata \
10-
# RUN: --keep-nops --eliminate-unreachable=false
11-
# RUN: llvm-objdump --disassemble-symbols=foo %t.bolt | FileCheck %s
9+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
10+
# RUN: %t/materialize-ci-big-func.s -o %t/materialize-ci-big-func.o
11+
# RUN: %clang %cflags %t/materialize-ci-big-func.o -Wl,-q \
12+
# RUN: -o %t/materialize-ci-big-func.exe
13+
# RUN: llvm-bolt %t/materialize-ci-big-func.exe \
14+
# RUN: -o %t/materialize-ci-big-func.bolt --lite=0 \
15+
# RUN: --keep-nops --eliminate-unreachable=false \
16+
# RUN: | FileCheck %s --check-prefix=CHECK-BIG-FUNC
1217

13-
# CHECK: mov{{.*}} w19, #0
14-
# CHECK-NEXT: mov{{.*}} w22, #0
15-
# CHECK-NEXT: movk{{.*}} w23, #0, lsl #16
16-
# CHECK-NEXT: movk{{.*}} w23, #100
17-
# CHECK-NEXT: movk{{.*}} w24, #0, lsl #16
18-
# CHECK-NEXT: movk{{.*}} w24, #3
18+
# CHECK-BIG-FUNC: simplified 2 out of 2 loads from a statically computed address
1919

20+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
21+
# RUN: %t/materialize-ci-outside-func.s -o %t/materialize-ci-outside-func.o
22+
# RUN: %clang %cflags %t/materialize-ci-outside-func.o -Wl,-q \
23+
# RUN: -o %t/materialize-ci-outside-func.exe
24+
# RUN: llvm-bolt %t/materialize-ci-outside-func.exe \
25+
# RUN: -o %t/materialize-ci-outside-func.bolt --lite=0 \
26+
# RUN: | FileCheck %s --check-prefix=CHECK-OUTSIDE-FUNC
27+
28+
# CHECK-OUTSIDE-FUNC: simplified 2 out of 2 loads from a statically computed address
29+
30+
#--- materialize-ci-big-func.s
2031
.text
2132
.align 4
22-
.local foo
33+
.global foo
2334
.type foo, %function
2435
foo:
25-
# FDATA: 1 main 0 1 foo 0 0 10
2636
stp x29, x30, [sp, #-32]!
2737
stp x19, x20, [sp, #16]
2838
mov x29, sp
2939

30-
mov w19, #0 // counter = 0
31-
mov w22, #0 // result = 0
40+
mov w19, #0
41+
mov w22, #0
3242

3343
ldr w23, .Llimit
34-
ldr w24, .LStep
44+
ldr x24, .LStep
3545
b .LStub
3646

47+
// CI moved by emitCI function to the end of the function
48+
// without materialization CI is outside available range (+/-1MB)
3749
.LConstants:
3850
.Llimit: .word 100
39-
.LStep: .word 3
51+
.LStep: .xword 3
4052

4153
.LStub:
54+
55+
56+
57+
4258
.rep 0x100000
4359
nop
4460
.endr
4561
b .Lmain_loop
4662

4763
.Lmain_loop:
48-
madd w22, w19, w24, w22 // result += counter * increment
64+
madd w22, w19, w24, w22
4965

5066
add w19, w19, #1
5167
cmp w19, w23
@@ -62,13 +78,56 @@ foo:
6278
.size foo, .-foo
6379

6480

65-
.global main
66-
.type main, %function
67-
main:
81+
.global _start
82+
.type _start, %function
83+
_start:
84+
mov x0, #0
85+
bl foo
86+
mov x0, 0
87+
mov w8, #93
88+
svc #0
89+
90+
.size _start, .-_start
91+
92+
#--- materialize-ci-outside-func.s
93+
.text
94+
.align 4
95+
.global foo
96+
.type foo, %function
97+
foo:
98+
stp x29, x30, [sp, #-32]!
99+
stp x19, x20, [sp, #16]
100+
mov x29, sp
101+
102+
mov w19, #0
103+
mov w22, #0
104+
105+
ldr w23, .Llimit
106+
ldr x24, .LStep
107+
108+
.Lmain_loop:
109+
madd w22, w19, w24, w22
110+
add w19, w19, #1
111+
cmp w19, w23
112+
b.lt .Lmain_loop
113+
mov w0, w22
114+
.Lreturn_point:
115+
ldp x19, x20, [sp, #16]
116+
ldp x29, x30, [sp], #32
117+
ret
118+
.size foo, .-foo
119+
120+
.LConstants:
121+
.Llimit: .word 100
122+
.LStep: .xword 3
123+
124+
.global _start
125+
.type _start, %function
126+
_start:
68127
mov x0, #0
69128
bl foo
70-
mov x0, 0
71-
mov w8, #93
72-
svc #0
129+
mov x0, 0
130+
mov w8, #93
131+
svc #0
73132

74-
.size main, .-main
133+
.size _start, .-_start

0 commit comments

Comments
 (0)