Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,13 @@ class MCPlusBuilder {
return {};
}

virtual InstructionListType materializeConstant(const MCInst &Inst,
StringRef ConstantData,
uint64_t Offset) const {
llvm_unreachable("not implemented");
return {};
}

/// Creates a new unconditional branch instruction in Inst and set its operand
/// to TBB.
virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
Expand Down
53 changes: 41 additions & 12 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,8 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
uint64_t NumDynamicLocalLoadsFound = 0;

for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
for (MCInst &Inst : *BB) {
for (auto It = BB->begin(); It != BB->end(); ++It) {
const MCInst &Inst = *It;
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = BC.MII->get(Opcode);

Expand All @@ -1200,7 +1201,7 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {

if (MIB->hasPCRelOperand(Inst)) {
// Try to find the symbol that corresponds to the PC-relative operand.
MCOperand *DispOpI = MIB->getMemOperandDisp(Inst);
MCOperand *DispOpI = MIB->getMemOperandDisp(const_cast<MCInst &>(Inst));
assert(DispOpI != Inst.end() && "expected PC-relative displacement");
assert(DispOpI->isExpr() &&
"found PC-relative with non-symbolic displacement");
Expand All @@ -1226,28 +1227,53 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
}

// Get the contents of the section containing the target address of the
// memory operand. We are only interested in read-only sections.
// memory operand. We are only interested in read-only sections for X86,
// for aarch64 the sections can be read-only or executable.
ErrorOr<BinarySection &> DataSection =
BC.getSectionForAddress(TargetAddress);
if (!DataSection || DataSection->isWritable())
if (!DataSection)
continue;

if (BC.isX86() && DataSection->isWritable())
continue;

if (DataSection->isText()) {
// If data is not part of a function, check if it is part of a global CI
// Do not proceed if there aren't data markers for CIs
BinaryFunction *BFTgt =
BC.getBinaryFunctionContainingAddress(TargetAddress,
/*CheckPastEnd*/ false,
/*UseMaxSize*/ true);
const bool IsInsideFunc =
BFTgt && BFTgt->isInConstantIsland(TargetAddress);

auto CIEndIter = BC.AddressToConstantIslandMap.end();
auto CIIter = BC.AddressToConstantIslandMap.find(TargetAddress);
if (!IsInsideFunc && CIIter == CIEndIter)
continue;
}

if (BC.getRelocationAt(TargetAddress) ||
BC.getDynamicRelocationAt(TargetAddress))
continue;

uint32_t Offset = TargetAddress - DataSection->getAddress();
StringRef ConstantData = DataSection->getContents();

++NumLocalLoadsFound;
if (BB->hasProfile())
NumDynamicLocalLoadsFound += BB->getExecutionCount();

if (MIB->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
++NumLocalLoadsSimplified;
if (BB->hasProfile())
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
}
uint32_t Offset = TargetAddress - DataSection->getAddress();
StringRef ConstantData = DataSection->getContents();
const InstructionListType Instrs =
MIB->materializeConstant(Inst, ConstantData, Offset);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For x86, could we keep the original code which directly update the Instr in place? The new code calling materializeConstant() will call replaceMemOperandWithImm(), create a new Instr and copy over the new instruction operand by operand, find the original instruction in basic block, and then replace it with the new instruction.

Copy link
Contributor Author

@yavtuk yavtuk Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to use extra wrapper to unify for all platform, replaceMemOperandWithImm is used in another optimisations passes related to X86

if (Instrs.empty())
continue;

auto IIter = BB->findInstruction(&Inst);
It = std::next(BB->replaceInstruction(IIter, Instrs), Instrs.size());

++NumLocalLoadsSimplified;
if (BB->hasProfile())
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
}
}

Expand All @@ -1260,6 +1286,9 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
}

Error SimplifyRODataLoads::runOnFunctions(BinaryContext &BC) {
if (BC.isRISCV())
return Error::success();

for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &Function = It.second;
if (shouldOptimize(Function) && simplifyRODataLoads(Function))
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ static cl::opt<bool> SimplifyRODataLoads(
"simplify-rodata-loads",
cl::desc("simplify loads from read-only sections by replacing the memory "
"operand with the constant found in the corresponding section"),
cl::cat(BoltOptCategory));
cl::init(true), cl::cat(BoltOptCategory));

static cl::list<std::string>
SpecializeMemcpy1("memcpy1-spec",
Expand Down
47 changes: 47 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2770,6 +2770,53 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Insts;
}

InstructionListType materializeConstant(const MCInst &Inst,
StringRef ConstantData,
uint64_t Offset) const override {
struct InstInfo {
// Size in bytes that Inst loads from memory.
uint8_t DataSize;
// Number of instructions needed to materialize the constant.
uint8_t numInstrs;
// Opcode to use for materializing the constant.
unsigned Opcode;
};

InstInfo I;
InstructionListType Insts(0);
switch (Inst.getOpcode()) {
case AArch64::LDRWl:
I = {4, 2, AArch64::MOVKWi};
break;
case AArch64::LDRXl:
I = {8, 4, AArch64::MOVKXi};
break;
default:
llvm_unreachable("unexpected ldr instruction");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could there be any other PC relative LDR instructions hit the default case, like byte or half-word load? Just in case, maybe replace llvm_unreachable() with a comment saying unsupported and then return Insts.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't find instructions related to byte or half wold, now only 2 instructions are handled.
we can analyse the apps to figure out which and how much are used, I faced only 4 and 8 bytes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we will be able to add extra logic as necessary

break;
}

if (ConstantData.size() - Offset < I.DataSize)
return Insts;

const uint64_t ImmVal =
DataExtractor(ConstantData, true, 8).getUnsigned(&Offset, I.DataSize);

Insts.resize(I.numInstrs);
unsigned shift = (Insts.size() - 1) * 16;
MCPhysReg Reg = Inst.getOperand(0).getReg();
for (unsigned i = 0; i < Insts.size(); i++, shift -= 16) {
Insts[i].setOpcode(I.Opcode);
Insts[i].clear();
Insts[i].addOperand(MCOperand::createReg(Reg));
Insts[i].addOperand(MCOperand::createReg(Reg));
Insts[i].addOperand(MCOperand::createImm((ImmVal >> shift) & 0xFFFF));
Insts[i].addOperand(MCOperand::createImm(shift));
}

return Insts;
}

std::optional<Relocation>
createRelocation(const MCFixup &Fixup,
const MCAsmBackend &MAB) const override {
Expand Down
18 changes: 18 additions & 0 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1477,6 +1477,24 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return true;
}

InstructionListType materializeConstant(const MCInst &Inst,
StringRef ConstantData,
uint64_t Offset) const override {
InstructionListType Instrs;
MCInst InstCopy = Inst;

if (!replaceMemOperandWithImm(InstCopy, ConstantData, Offset))
return InstructionListType{};

Instrs.emplace_back();
Instrs.back().setOpcode(InstCopy.getOpcode());
Instrs.back().clear();
for (unsigned i = 0; i < InstCopy.getNumOperands(); ++i)
Instrs.back().addOperand(InstCopy.getOperand(i));

return Instrs;
}

/// TODO: this implementation currently works for the most common opcodes that
/// load from memory. It can be extended to work with memory store opcodes as
/// well as more memory load opcodes.
Expand Down
87 changes: 87 additions & 0 deletions bolt/test/AArch64/materialize-constant.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// this test checks a load literal instructions changed to movk

# REQUIRES: system-linux

# RUN: rm -rf %t && split-file %s %t

# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
# RUN: --defsym CIBIGFUNC=1 \
# RUN: %t/materialized-ci.s -o %t/materialized-ci.o
# RUN: %clang %t/materialized-ci.o -Wl,-q \
# RUN: -o %t/materialized-ci.exe
# RUN: llvm-bolt %t/materialized-ci.exe \
# RUN: -o %t/materialized-ci.bolt --lite=0 \
# RUN: --keep-nops --eliminate-unreachable=false \
# RUN: | FileCheck %s --check-prefix=CHECK-LOGS

# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
# RUN: --defsym CIOUTSIDEFUNC=1 \
# RUN: %t/materialized-ci.s -o %t/materialized-ci.o
# RUN: %clang %t/materialized-ci.o -Wl,-q \
# RUN: -o %t/materialized-ci.exe
# RUN: llvm-bolt %t/materialized-ci.exe \
# RUN: -o %t/materialized-ci.bolt --lite=0 \
# RUN: --keep-nops --eliminate-unreachable=false \
# RUN: | FileCheck %s --check-prefix=CHECK-LOGS

# CHECK-LOGS: simplified 2 out of 2 loads

#--- materialized-ci.s
.text
.align 4
.local foo
.type foo, %function
foo:
stp x29, x30, [sp, #-32]!
stp x19, x20, [sp, #16]
mov x29, sp

mov w19, #0 // counter = 0
mov w22, #0 // result = 0

ldr w23, .Llimit
ldr x24, .LStep

.ifdef CIBIGFUNC
b .LStub
.LConstants:
.Llimit: .word 100
.LStep: .xword 3
.LStub:
.rep 0x100000
nop
.endr
b .Lmain_loop
.endif

.Lmain_loop:
madd w22, w19, w24, w22 // result += counter * increment
add w19, w19, #1
cmp w19, w23
b.lt .Lmain_loop
mov w0, w22
b .Lreturn_point
.Lreturn_point:
ldp x19, x20, [sp, #16]
ldp x29, x30, [sp], #32
ret
.size foo, .-foo

.ifdef CIOUTSIDEFUNC
.LConstants:
.Llimit: .word 100
.LStep: .xword 3
.endif


.global main
.type main, %function
main:
mov x0, #0
bl foo
mov x0, 0
mov w8, #93
svc #0

.size main, .-main

Loading