Skip to content

Commit 95e84b2

Browse files
committed
[bolt] simplify constant loads for X86 & AArch64
This patch fixed the issue related to load literal for AArch64 (bolt/test/AArch64/materialize-constant.s), address range for literal is limited +/- 1MB, emitCI puts the constants by the end of function and the one is out of available range. SimplifyRODataLoads is enabled by default for X86 & AArch64 Signed-off-by: Moksyakov Alexey <[email protected]>
1 parent 85cc465 commit 95e84b2

File tree

6 files changed

+183
-33
lines changed

6 files changed

+183
-33
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,13 @@ class MCPlusBuilder {
18721872
return {};
18731873
}
18741874

1875+
virtual InstructionListType materializeConstant(const MCInst &Inst,
1876+
StringRef ConstantData,
1877+
uint64_t Offset) const {
1878+
llvm_unreachable("not implemented");
1879+
return {};
1880+
}
1881+
18751882
/// Creates a new unconditional branch instruction in Inst and set its operand
18761883
/// to TBB.
18771884
virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,7 +1187,8 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
11871187
uint64_t NumDynamicLocalLoadsFound = 0;
11881188

11891189
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1190-
for (MCInst &Inst : *BB) {
1190+
for (auto It = BB->begin(); It != BB->end(); ++It) {
1191+
const MCInst &Inst = *It;
11911192
unsigned Opcode = Inst.getOpcode();
11921193
const MCInstrDesc &Desc = BC.MII->get(Opcode);
11931194

@@ -1200,7 +1201,7 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12001201

12011202
if (MIB->hasPCRelOperand(Inst)) {
12021203
// Try to find the symbol that corresponds to the PC-relative operand.
1203-
MCOperand *DispOpI = MIB->getMemOperandDisp(Inst);
1204+
MCOperand *DispOpI = MIB->getMemOperandDisp(const_cast<MCInst&>(Inst));
12041205
assert(DispOpI != Inst.end() && "expected PC-relative displacement");
12051206
assert(DispOpI->isExpr() &&
12061207
"found PC-relative with non-symbolic displacement");
@@ -1226,28 +1227,53 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12261227
}
12271228

12281229
// Get the contents of the section containing the target address of the
1229-
// memory operand. We are only interested in read-only sections.
1230+
// memory operand. We are only interested in read-only sections for X86,
1231+
// for aarch64 the sections can be read-only or executable.
12301232
ErrorOr<BinarySection &> DataSection =
12311233
BC.getSectionForAddress(TargetAddress);
1232-
if (!DataSection || DataSection->isWritable())
1234+
if (!DataSection)
12331235
continue;
12341236

1237+
if (BC.isX86() && DataSection->isWritable())
1238+
continue;
1239+
1240+
if (DataSection->isText()) {
1241+
// if data is not part of a function, check if it is part of a global CI
1242+
// do not proceed if there aren't data markers for CIs
1243+
BinaryFunction *BFTgt =
1244+
BC.getBinaryFunctionContainingAddress(TargetAddress,
1245+
/*CheckPastEnd*/ false,
1246+
/*UseMaxSize*/ true);
1247+
const bool IsInsideFunc =
1248+
BFTgt && BFTgt->isInConstantIsland(TargetAddress);
1249+
1250+
auto CIEndIter = BC.AddressToConstantIslandMap.end();
1251+
auto CIIter = BC.AddressToConstantIslandMap.find(TargetAddress);
1252+
if (!IsInsideFunc && CIIter == CIEndIter)
1253+
continue;
1254+
}
1255+
12351256
if (BC.getRelocationAt(TargetAddress) ||
12361257
BC.getDynamicRelocationAt(TargetAddress))
12371258
continue;
12381259

1239-
uint32_t Offset = TargetAddress - DataSection->getAddress();
1240-
StringRef ConstantData = DataSection->getContents();
1241-
12421260
++NumLocalLoadsFound;
12431261
if (BB->hasProfile())
12441262
NumDynamicLocalLoadsFound += BB->getExecutionCount();
12451263

1246-
if (MIB->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
1247-
++NumLocalLoadsSimplified;
1248-
if (BB->hasProfile())
1249-
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
1250-
}
1264+
uint32_t Offset = TargetAddress - DataSection->getAddress();
1265+
StringRef ConstantData = DataSection->getContents();
1266+
const InstructionListType Instrs =
1267+
MIB->materializeConstant(Inst, ConstantData, Offset);
1268+
if (Instrs.empty())
1269+
continue;
1270+
1271+
auto IIter = BB->findInstruction(&Inst);
1272+
It = BB->replaceInstruction(IIter, Instrs);
1273+
1274+
++NumLocalLoadsSimplified;
1275+
if (BB->hasProfile())
1276+
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
12511277
}
12521278
}
12531279

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ static cl::opt<bool> SimplifyRODataLoads(
236236
"simplify-rodata-loads",
237237
cl::desc("simplify loads from read-only sections by replacing the memory "
238238
"operand with the constant found in the corresponding section"),
239-
cl::cat(BoltOptCategory));
239+
cl::init(true), cl::cat(BoltOptCategory));
240240

241241
static cl::list<std::string>
242242
SpecializeMemcpy1("memcpy1-spec",

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2770,6 +2770,51 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
27702770
return Insts;
27712771
}
27722772

2773+
InstructionListType materializeConstant(const MCInst &Inst,
2774+
StringRef ConstantData,
2775+
uint64_t Offset) const override {
2776+
struct InstInfo {
2777+
// Size in bytes that Inst loads from memory.
2778+
uint8_t DataSize;
2779+
// number instructions needed to materialize the constant.
2780+
uint8_t numInstrs;
2781+
// Opcode to use for materializing the constant.
2782+
unsigned Opcode;
2783+
};
2784+
2785+
InstInfo I;
2786+
InstructionListType Insts(0);
2787+
switch (Inst.getOpcode()) {
2788+
case AArch64::LDRWl: I = {4, 2, AArch64::MOVKWi}; break;
2789+
case AArch64::LDRXl: I = {8, 4, AArch64::MOVKXi}; break;
2790+
default:
2791+
llvm_unreachable("unexpected ldr instruction");
2792+
break;
2793+
}
2794+
2795+
const uint64_t ConstantSize = ConstantData.size() - Offset > I.DataSize
2796+
? I.DataSize
2797+
: ConstantData.size() - Offset;
2798+
if (ConstantSize != I.DataSize)
2799+
return Insts;
2800+
2801+
const uint64_t ImmVal = DataExtractor(ConstantData, true, 8).getUnsigned(&Offset, I.DataSize);
2802+
2803+
Insts.resize(I.numInstrs);
2804+
unsigned shift = (Insts.size() - 1) * 16;
2805+
MCPhysReg Reg = Inst.getOperand(0).getReg();
2806+
for (unsigned i = 0; i < Insts.size(); i++, shift -= 16) {
2807+
Insts[i].setOpcode(I.Opcode);
2808+
Insts[i].clear();
2809+
Insts[i].addOperand(MCOperand::createReg(Reg));
2810+
Insts[i].addOperand(MCOperand::createReg(Reg));
2811+
Insts[i].addOperand(MCOperand::createImm((ImmVal >> shift) & 0xFFFF));
2812+
Insts[i].addOperand(MCOperand::createImm(shift));
2813+
}
2814+
2815+
return Insts;
2816+
}
2817+
27732818
std::optional<Relocation>
27742819
createRelocation(const MCFixup &Fixup,
27752820
const MCAsmBackend &MAB) const override {

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,6 +1477,25 @@ class X86MCPlusBuilder : public MCPlusBuilder {
14771477
return true;
14781478
}
14791479

1480+
InstructionListType materializeConstant(const MCInst &Inst,
1481+
StringRef ConstantData,
1482+
uint64_t Offset) const override {
1483+
InstructionListType Instrs;
1484+
MCInst InstCopy = Inst;
1485+
1486+
if (!replaceMemOperandWithImm(InstCopy, ConstantData, Offset))
1487+
return InstructionListType{};
1488+
1489+
Instrs.emplace_back();
1490+
Instrs.back().setOpcode(InstCopy.getOpcode());
1491+
Instrs.back().clear();
1492+
for (unsigned i = 0; i < InstCopy.getNumOperands(); ++i)
1493+
Instrs.back().addOperand(InstCopy.getOperand(i));
1494+
1495+
return Instrs;
1496+
}
1497+
1498+
14801499
/// TODO: this implementation currently works for the most common opcodes that
14811500
/// load from memory. It can be extended to work with memory store opcodes as
14821501
/// well as more memory load opcodes.

bolt/test/AArch64/materialize-constant.s

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,36 @@
11
// this test checks a load literal instructions changed to movk
22

3-
// REQUIRES: system-linux
3+
# REQUIRES: system-linux
44

5-
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
5+
# RUN: rm -rf %t && split-file %s %t
66

7-
# RUN: link_fdata %s %t.o %t.fdata
8-
# RUN: %clang %cflags -pie %t.o -o %t.exe -Wl,-q -Wl,-z,relro -Wl,-z,now
9-
# RUN: llvm-bolt %t.exe -o %t.bolt -data %t.fdata \
10-
# RUN: --keep-nops --eliminate-unreachable=false
11-
# RUN: llvm-objdump --disassemble-symbols=foo %t.bolt | FileCheck %s
7+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
8+
# RUN: %t/materialized-ci-big-func.s -o %t/materialized-ci-big-func.o
9+
# RUN: %clang %t/materialized-ci-big-func.o -Wl,-q \
10+
# RUN: -o %t/materialized-ci-big-func.exe
11+
# RUN: llvm-bolt %t/materialized-ci-big-func.exe \
12+
# RUN: -o %t/materialized-ci-big-func.bolt --lite=0 \
13+
# RUN: --keep-nops --eliminate-unreachable=false \
14+
# RUN: | FileCheck %s --check-prefix=CHECK-BIG-FUNC
1215

13-
# CHECK: mov{{.*}} w19, #0
14-
# CHECK-NEXT: mov{{.*}} w22, #0
15-
# CHECK-NEXT: movk{{.*}} w23, #0, lsl #16
16-
# CHECK-NEXT: movk{{.*}} w23, #100
17-
# CHECK-NEXT: movk{{.*}} w24, #0, lsl #16
18-
# CHECK-NEXT: movk{{.*}} w24, #3
16+
# CHECK-BIG-FUNC: simplified 2 out of 2 loads from a statically computed address
1917

18+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
19+
# RUN: %t/materialize-ci-outside-func.s -o %t/materialize-ci-outside-func.o
20+
# RUN: %clang %t/materialize-ci-outside-func.o -Wl,-q \
21+
# RUN: -o %t/materialize-ci-outside-func.exe
22+
# RUN: llvm-bolt %t/materialize-ci-outside-func.exe \
23+
# RUN: -o %t/materialize-ci-outside-func.bolt --lite=0 \
24+
# RUN: | FileCheck %s --check-prefix=CHECK-OUTSIDE-FUNC
25+
26+
# CHECK-OUTSIDE-FUNC: simplified 2 out of 2 loads from a statically computed address
27+
28+
#--- materialized-ci-big-func.s
2029
.text
2130
.align 4
2231
.local foo
2332
.type foo, %function
2433
foo:
25-
# FDATA: 1 main 0 1 foo 0 0 10
2634
stp x29, x30, [sp, #-32]!
2735
stp x19, x20, [sp, #16]
2836
mov x29, sp
@@ -31,12 +39,12 @@ foo:
3139
mov w22, #0 // result = 0
3240

3341
ldr w23, .Llimit
34-
ldr w24, .LStep
42+
ldr x24, .LStep
3543
b .LStub
3644

3745
.LConstants:
3846
.Llimit: .word 100
39-
.LStep: .word 3
47+
.LStep: .xword 3
4048

4149
.LStub:
4250
.rep 0x100000
@@ -67,8 +75,53 @@ foo:
6775
main:
6876
mov x0, #0
6977
bl foo
70-
mov x0, 0
71-
mov w8, #93
72-
svc #0
78+
mov x0, 0
79+
mov w8, #93
80+
svc #0
81+
82+
.size main, .-main
83+
84+
#--- materialize-ci-outside-func.s
85+
// check that constants in .text section but outside functions
86+
// are materialized correctly
87+
.text
88+
.align 4
89+
.local foo
90+
.type foo, %function
91+
foo:
92+
stp x29, x30, [sp, #-32]!
93+
stp x19, x20, [sp, #16]
94+
mov x29, sp
95+
96+
mov w19, #0 // counter = 0
97+
mov w22, #0 // result = 0
98+
99+
ldr w23, .Llimit
100+
ldr x24, .LStep
101+
102+
.Lmain_loop:
103+
madd w22, w19, w24, w22
104+
add w19, w19, #1
105+
cmp w19, w23
106+
b.lt .Lmain_loop
107+
mov w0, w22
108+
.Lreturn_point:
109+
ldp x19, x20, [sp, #16]
110+
ldp x29, x30, [sp], #32
111+
ret
112+
.size foo, .-foo
113+
114+
.LConstants:
115+
.Llimit: .word 100
116+
.LStep: .xword 3
117+
118+
.global main
119+
.type main, %function
120+
main:
121+
mov x0, #0
122+
bl foo
123+
mov x0, 0
124+
mov w8, #93
125+
svc #0
73126

74-
.size main, .-main
127+
.size main, .-main

0 commit comments

Comments
 (0)