Skip to content

Commit e96818d

Browse files
yavtukyavtuk
authored andcommitted
[bolt] simplify constant loads for X86 & AArch64
This patch fixed the issue related to load literal for AArch64 (bolt/test/AArch64/materialize-constant.s), address range for literal is limited +/- 1MB, emitCI puts the constants by the end of function and the one is out of available range. SimplifyRODataLoads is enabled by default for X86 & AArch64 Signed-off-by: Moksyakov Alexey <[email protected]>
1 parent 85cc465 commit e96818d

File tree

6 files changed

+155
-41
lines changed

6 files changed

+155
-41
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,13 @@ class MCPlusBuilder {
18721872
return {};
18731873
}
18741874

1875+
virtual InstructionListType materializeConstant(const MCInst &Inst,
1876+
StringRef ConstantData,
1877+
uint64_t Offset) const {
1878+
llvm_unreachable("not implemented");
1879+
return {};
1880+
}
1881+
18751882
/// Creates a new unconditional branch instruction in Inst and set its operand
18761883
/// to TBB.
18771884
virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,7 +1187,8 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
11871187
uint64_t NumDynamicLocalLoadsFound = 0;
11881188

11891189
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1190-
for (MCInst &Inst : *BB) {
1190+
for (auto It = BB->begin(); It != BB->end(); ++It) {
1191+
const MCInst &Inst = *It;
11911192
unsigned Opcode = Inst.getOpcode();
11921193
const MCInstrDesc &Desc = BC.MII->get(Opcode);
11931194

@@ -1200,7 +1201,7 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12001201

12011202
if (MIB->hasPCRelOperand(Inst)) {
12021203
// Try to find the symbol that corresponds to the PC-relative operand.
1203-
MCOperand *DispOpI = MIB->getMemOperandDisp(Inst);
1204+
MCOperand *DispOpI = MIB->getMemOperandDisp(const_cast<MCInst &>(Inst));
12041205
assert(DispOpI != Inst.end() && "expected PC-relative displacement");
12051206
assert(DispOpI->isExpr() &&
12061207
"found PC-relative with non-symbolic displacement");
@@ -1226,28 +1227,53 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12261227
}
12271228

12281229
// Get the contents of the section containing the target address of the
1229-
// memory operand. We are only interested in read-only sections.
1230+
// memory operand. We are only interested in read-only sections for X86,
1231+
// for aarch64 the sections can be read-only or executable.
12301232
ErrorOr<BinarySection &> DataSection =
12311233
BC.getSectionForAddress(TargetAddress);
1232-
if (!DataSection || DataSection->isWritable())
1234+
if (!DataSection)
12331235
continue;
12341236

1237+
if (BC.isX86() && DataSection->isWritable())
1238+
continue;
1239+
1240+
if (DataSection->isText()) {
1241+
// If data is not part of a function, check if it is part of a global CI
1242+
// Do not proceed if there aren't data markers for CIs
1243+
BinaryFunction *BFTgt =
1244+
BC.getBinaryFunctionContainingAddress(TargetAddress,
1245+
/*CheckPastEnd*/ false,
1246+
/*UseMaxSize*/ true);
1247+
const bool IsInsideFunc =
1248+
BFTgt && BFTgt->isInConstantIsland(TargetAddress);
1249+
1250+
auto CIEndIter = BC.AddressToConstantIslandMap.end();
1251+
auto CIIter = BC.AddressToConstantIslandMap.find(TargetAddress);
1252+
if (!IsInsideFunc && CIIter == CIEndIter)
1253+
continue;
1254+
}
1255+
12351256
if (BC.getRelocationAt(TargetAddress) ||
12361257
BC.getDynamicRelocationAt(TargetAddress))
12371258
continue;
12381259

1239-
uint32_t Offset = TargetAddress - DataSection->getAddress();
1240-
StringRef ConstantData = DataSection->getContents();
1241-
12421260
++NumLocalLoadsFound;
12431261
if (BB->hasProfile())
12441262
NumDynamicLocalLoadsFound += BB->getExecutionCount();
12451263

1246-
if (MIB->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
1247-
++NumLocalLoadsSimplified;
1248-
if (BB->hasProfile())
1249-
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
1250-
}
1264+
uint32_t Offset = TargetAddress - DataSection->getAddress();
1265+
StringRef ConstantData = DataSection->getContents();
1266+
const InstructionListType Instrs =
1267+
MIB->materializeConstant(Inst, ConstantData, Offset);
1268+
if (Instrs.empty())
1269+
continue;
1270+
1271+
auto IIter = BB->findInstruction(&Inst);
1272+
It = std::next(BB->replaceInstruction(IIter, Instrs), Instrs.size());
1273+
1274+
++NumLocalLoadsSimplified;
1275+
if (BB->hasProfile())
1276+
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
12511277
}
12521278
}
12531279

@@ -1260,6 +1286,9 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12601286
}
12611287

12621288
Error SimplifyRODataLoads::runOnFunctions(BinaryContext &BC) {
1289+
if (BC.isRISCV())
1290+
return Error::success();
1291+
12631292
for (auto &It : BC.getBinaryFunctions()) {
12641293
BinaryFunction &Function = It.second;
12651294
if (shouldOptimize(Function) && simplifyRODataLoads(Function))

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ static cl::opt<bool> SimplifyRODataLoads(
236236
"simplify-rodata-loads",
237237
cl::desc("simplify loads from read-only sections by replacing the memory "
238238
"operand with the constant found in the corresponding section"),
239-
cl::cat(BoltOptCategory));
239+
cl::init(true), cl::cat(BoltOptCategory));
240240

241241
static cl::list<std::string>
242242
SpecializeMemcpy1("memcpy1-spec",

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2770,6 +2770,53 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
27702770
return Insts;
27712771
}
27722772

2773+
InstructionListType materializeConstant(const MCInst &Inst,
2774+
StringRef ConstantData,
2775+
uint64_t Offset) const override {
2776+
struct InstInfo {
2777+
// Size in bytes that Inst loads from memory.
2778+
uint8_t DataSize;
2779+
// Number of instructions needed to materialize the constant.
2780+
uint8_t numInstrs;
2781+
// Opcode to use for materializing the constant.
2782+
unsigned Opcode;
2783+
};
2784+
2785+
InstInfo I;
2786+
InstructionListType Insts(0);
2787+
switch (Inst.getOpcode()) {
2788+
case AArch64::LDRWl:
2789+
I = {4, 2, AArch64::MOVKWi};
2790+
break;
2791+
case AArch64::LDRXl:
2792+
I = {8, 4, AArch64::MOVKXi};
2793+
break;
2794+
default:
2795+
llvm_unreachable("unexpected ldr instruction");
2796+
break;
2797+
}
2798+
2799+
if (ConstantData.size() - Offset < I.DataSize)
2800+
return Insts;
2801+
2802+
const uint64_t ImmVal =
2803+
DataExtractor(ConstantData, true, 8).getUnsigned(&Offset, I.DataSize);
2804+
2805+
Insts.resize(I.numInstrs);
2806+
unsigned shift = (Insts.size() - 1) * 16;
2807+
MCPhysReg Reg = Inst.getOperand(0).getReg();
2808+
for (unsigned i = 0; i < Insts.size(); i++, shift -= 16) {
2809+
Insts[i].setOpcode(I.Opcode);
2810+
Insts[i].clear();
2811+
Insts[i].addOperand(MCOperand::createReg(Reg));
2812+
Insts[i].addOperand(MCOperand::createReg(Reg));
2813+
Insts[i].addOperand(MCOperand::createImm((ImmVal >> shift) & 0xFFFF));
2814+
Insts[i].addOperand(MCOperand::createImm(shift));
2815+
}
2816+
2817+
return Insts;
2818+
}
2819+
27732820
std::optional<Relocation>
27742821
createRelocation(const MCFixup &Fixup,
27752822
const MCAsmBackend &MAB) const override {

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,6 +1477,24 @@ class X86MCPlusBuilder : public MCPlusBuilder {
14771477
return true;
14781478
}
14791479

1480+
InstructionListType materializeConstant(const MCInst &Inst,
1481+
StringRef ConstantData,
1482+
uint64_t Offset) const override {
1483+
InstructionListType Instrs;
1484+
MCInst InstCopy = Inst;
1485+
1486+
if (!replaceMemOperandWithImm(InstCopy, ConstantData, Offset))
1487+
return InstructionListType{};
1488+
1489+
Instrs.emplace_back();
1490+
Instrs.back().setOpcode(InstCopy.getOpcode());
1491+
Instrs.back().clear();
1492+
for (unsigned i = 0; i < InstCopy.getNumOperands(); ++i)
1493+
Instrs.back().addOperand(InstCopy.getOperand(i));
1494+
1495+
return Instrs;
1496+
}
1497+
14801498
/// TODO: this implementation currently works for the most common opcodes that
14811499
/// load from memory. It can be extended to work with memory store opcodes as
14821500
/// well as more memory load opcodes.

bolt/test/AArch64/materialize-constant.s

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,37 @@
11
// this test checks a load literal instructions changed to movk
22

3-
// REQUIRES: system-linux
4-
5-
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
6-
7-
# RUN: link_fdata %s %t.o %t.fdata
8-
# RUN: %clang %cflags -pie %t.o -o %t.exe -Wl,-q -Wl,-z,relro -Wl,-z,now
9-
# RUN: llvm-bolt %t.exe -o %t.bolt -data %t.fdata \
10-
# RUN: --keep-nops --eliminate-unreachable=false
11-
# RUN: llvm-objdump --disassemble-symbols=foo %t.bolt | FileCheck %s
12-
13-
# CHECK: mov{{.*}} w19, #0
14-
# CHECK-NEXT: mov{{.*}} w22, #0
15-
# CHECK-NEXT: movk{{.*}} w23, #0, lsl #16
16-
# CHECK-NEXT: movk{{.*}} w23, #100
17-
# CHECK-NEXT: movk{{.*}} w24, #0, lsl #16
18-
# CHECK-NEXT: movk{{.*}} w24, #3
19-
3+
# REQUIRES: system-linux
4+
5+
# RUN: rm -rf %t && split-file %s %t
6+
7+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
8+
# RUN: -DCIBIGFUNC \
9+
# RUN: %t/materialized-ci.s -o %t/materialized-ci.o
10+
# RUN: %clang %t/materialized-ci.o -Wl,-q \
11+
# RUN: -o %t/materialized-ci.exe
12+
# RUN: llvm-bolt %t/materialized-ci.exe \
13+
# RUN: -o %t/materialized-ci.bolt --lite=0 \
14+
# RUN: --keep-nops --eliminate-unreachable=false \
15+
# RUN: | FileCheck %s --check-prefix=CHECK-LOGS
16+
17+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
18+
# RUN: -DCIOUTSIDEFUNC \
19+
# RUN: %t/materialized-ci.s -o %t/materialized-ci.o
20+
# RUN: %clang %t/materialized-ci.o -Wl,-q \
21+
# RUN: -o %t/materialized-ci.exe
22+
# RUN: llvm-bolt %t/materialized-ci.exe \
23+
# RUN: -o %t/materialized-ci.bolt --lite=0 \
24+
# RUN: --keep-nops --eliminate-unreachable=false \
25+
# RUN: | FileCheck %s --check-prefix=CHECK-LOGS
26+
27+
# CHECK-LOGS: simplified 2 out of 2 loads
28+
29+
#--- materialized-ci.s
2030
.text
2131
.align 4
2232
.local foo
2333
.type foo, %function
2434
foo:
25-
# FDATA: 1 main 0 1 foo 0 0 10
2635
stp x29, x30, [sp, #-32]!
2736
stp x19, x20, [sp, #16]
2837
mov x29, sp
@@ -31,44 +40,48 @@ foo:
3140
mov w22, #0 // result = 0
3241

3342
ldr w23, .Llimit
34-
ldr w24, .LStep
43+
ldr x24, .LStep
44+
#ifdef CIBIGFUNC
3545
b .LStub
3646

3747
.LConstants:
3848
.Llimit: .word 100
39-
.LStep: .word 3
49+
.LStep: .xword 3
4050

4151
.LStub:
4252
.rep 0x100000
4353
nop
4454
.endr
4555
b .Lmain_loop
56+
#endif
4657

4758
.Lmain_loop:
4859
madd w22, w19, w24, w22 // result += counter * increment
49-
5060
add w19, w19, #1
5161
cmp w19, w23
5262
b.lt .Lmain_loop
53-
5463
mov w0, w22
55-
5664
b .Lreturn_point
57-
5865
.Lreturn_point:
5966
ldp x19, x20, [sp, #16]
6067
ldp x29, x30, [sp], #32
6168
ret
6269
.size foo, .-foo
70+
#ifdef CIOUTSIDEFUNC
71+
.LConstants:
72+
.Llimit: .word 100
73+
.LStep: .xword 3
74+
#endif
6375

6476

6577
.global main
6678
.type main, %function
6779
main:
6880
mov x0, #0
6981
bl foo
70-
mov x0, 0
71-
mov w8, #93
72-
svc #0
82+
mov x0, 0
83+
mov w8, #93
84+
svc #0
85+
86+
.size main, .-main
7387

74-
.size main, .-main

0 commit comments

Comments
 (0)