From a54f1449c5b3ec65a7d036138616663d7b428cfa Mon Sep 17 00:00:00 2001 From: yjn <1076891326@qq.com> Date: Sat, 19 Jul 2025 23:38:47 +0800 Subject: [PATCH] [BOLT][RISCV]fix up GOT Relocation Handling --- bolt/include/bolt/Core/MCPlusBuilder.h | 14 +++ bolt/lib/Core/BinaryFunction.cpp | 102 ++++++++++++++++--- bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 18 ++++ bolt/test/RISCV/reloc-got.s | 16 ++- 4 files changed, 133 insertions(+), 17 deletions(-) diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index f902a8c43cd1d..108bf1528d886 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -839,6 +839,20 @@ class MCPlusBuilder { return StringRef(); } + /// Returns the base register used by the instruction. + virtual unsigned getBaseReg(const MCInst &Inst) const{ + llvm_unreachable("not implemented"); + return 0; + } + + /// Matches a pair of instructions that implement a GOT load: + /// an AUIPC (loading the high part of the address) + /// followed by a GOT-loading instruction (loading the low part of the address). + virtual bool matchGotAuipcPair(const MCInst &Inst) const{ + llvm_unreachable("not implemented"); + return false; + } + /// Interface and basic functionality of a MCInstMatcher. The idea is to make /// it easy to match one or more MCInsts against a tree-like pattern and /// extract the fragment operands. Example: diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index eec68ff5a5fce..a4dab59a18c85 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1457,7 +1457,7 @@ Error BinaryFunction::disassemble() { if (BC.isAArch64()) handleAArch64IndirectCall(Instruction, Offset); } - } else if (BC.isRISCV()) { + }else if (BC.isRISCV()) { // Check if there's a relocation associated with this instruction. for (auto Itr = Relocations.lower_bound(Offset), ItrE = Relocations.lower_bound(Offset + Size); @@ -1466,15 +1466,7 @@ Error BinaryFunction::disassemble() { MCSymbol *Symbol = Relocation.Symbol; if (Relocation::isInstructionReference(Relocation.Type)) { - uint64_t RefOffset = Relocation.Value - getAddress(); - LabelsMapType::iterator LI = InstructionLabels.find(RefOffset); - - if (LI == InstructionLabels.end()) { - Symbol = BC.Ctx->createNamedTempSymbol(); - InstructionLabels.emplace(RefOffset, Symbol); - } else { - Symbol = LI->second; - } + continue; } uint64_t Addend = Relocation.Addend; @@ -1484,8 +1476,7 @@ Error BinaryFunction::disassemble() { if (Relocation::isGOT(Relocation.Type)) { assert(Relocation::isPCRelative(Relocation.Type) && "GOT relocation must be PC-relative on RISC-V"); - Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0); - Addend = Relocation.Value + Relocation.Offset + getAddress(); + continue; } int64_t Value = Relocation.Value; const bool Result = BC.MIB->replaceImmWithSymbolRef( @@ -1493,7 +1484,7 @@ Error BinaryFunction::disassemble() { (void)Result; assert(Result && "cannot replace immediate with relocation"); } - } + } add_instruction: if (getDWARFLineTable()) { @@ -1514,6 +1505,90 @@ Error BinaryFunction::disassemble() { addInstruction(Offset, std::move(Instruction)); } + if(BC.isRISCV()){ + for (auto CurInstrIt = Instructions.begin(); CurInstrIt != Instructions.end(); ++CurInstrIt) { + uint64_t CurOffset = CurInstrIt->first; + if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(CurOffset)) continue; + + if(MIB->isBranch(CurInstrIt->second) || MIB->isCall(CurInstrIt->second)) continue; + if (MIB->isPseudo(CurInstrIt->second)) continue; + if (isZeroPaddingAt(CurInstrIt->first)) break; + + auto NextInstrIt = std::next(CurInstrIt); + uint64_t NextOffset = (NextInstrIt != Instructions.end()) ? NextInstrIt->first : getSize(); + for (auto Itr = Relocations.lower_bound(CurOffset), + ItrE = Relocations.lower_bound(NextOffset); + Itr != ItrE; ++Itr) { + Relocation &Relocation = Itr->second; + MCSymbol *Symbol = Relocation.Symbol; + + if (Relocation::isInstructionReference(Relocation.Type)) { + uint64_t RefOffset = Relocation.Value - getAddress(); + LabelsMapType::iterator LI = InstructionLabels.find(RefOffset); + + if (LI == InstructionLabels.end()) { + Symbol = BC.Ctx->createNamedTempSymbol(); + InstructionLabels.emplace(RefOffset, Symbol); + } else { + Symbol = LI->second; + } + } + + uint64_t Addend = Relocation.Addend; + + // For GOT relocations, create a reference against GOT entry ignoring + // the relocation symbol. + if (Relocation::isGOT(Relocation.Type)) { + assert(Relocation::isPCRelative(Relocation.Type) && + "GOT relocation must be PC-relative on RISC-V"); + // For RISC-V, we need to find the next instruction + // that matches the current instruction's base register. + auto NextInstrIt = std::next(CurInstrIt); + unsigned CurReg = BC.MIB->getBaseReg(CurInstrIt->second); + while (NextInstrIt != Instructions.end()) { + MCInst &NextInst = NextInstrIt->second; + unsigned NextReg = BC.MIB->getBaseReg(NextInst); + // some case there exit extra auipc instruction + // like auipc+auipc+ld instruction,so we need skip it + if(CurReg == NextReg && !BC.MIB->matchGotAuipcPair(NextInst)) { + break; + } + if(CurReg == NextReg && BC.MIB->matchGotAuipcPair(NextInst)){ + + int64_t CurImm = 0; + for (const MCOperand &Op : CurInstrIt->second) { + if (Op.isImm()) { + CurImm = Op.getImm(); + break; + } + } + int64_t NextImm = 0; + for (const MCOperand &Op : NextInstrIt->second) { + if (Op.isImm()) { + NextImm = Op.getImm(); + break; + } + } + Relocation.Value = (CurImm << 12) + NextImm; + break; + } + NextInstrIt = std::next(NextInstrIt); + } + Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0); + Addend = Relocation.Value + Relocation.Offset + getAddress(); + + }else if (!Relocation::isInstructionReference(Relocation.Type)) { + continue; + } + int64_t Value = Relocation.Value; + const bool Result = BC.MIB->replaceImmWithSymbolRef( + CurInstrIt->second, Symbol, Addend, Ctx.get(), Value, Relocation.Type); + (void)Result; + assert(Result && "cannot replace immediate with relocation"); + + } + } + } for (auto [Offset, Label] : InstructionLabels) { InstrMapType::iterator II = Instructions.find(Offset); @@ -1521,7 +1596,6 @@ Error BinaryFunction::disassemble() { BC.MIB->setInstLabel(II->second, Label); } - // Reset symbolizer for the disassembler. BC.SymbolicDisAsm->setSymbolizer(nullptr); diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp index 10b4913b6ab7f..9bebe59b400aa 100644 --- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp @@ -339,6 +339,24 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { } } + unsigned getBaseReg(const MCInst &Inst) const override{ + switch (Inst.getOpcode()) { + default: + return 0; + case RISCV::AUIPC: + return Inst.getOperand(0).getReg(); + case RISCV::ADDI: + case RISCV::LD: + return Inst.getOperand(1).getReg(); + } + } + + bool matchGotAuipcPair(const MCInst &Inst) const override{ + return Inst.getOpcode() == RISCV::ADDI || + Inst.getOpcode() == RISCV::LD ; + } + + const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override { auto *RISCVExpr = dyn_cast(Expr); if (RISCVExpr && RISCVExpr->getSubExpr()) diff --git a/bolt/test/RISCV/reloc-got.s b/bolt/test/RISCV/reloc-got.s index e7e85fddfb1cb..a9049ccf21064 100644 --- a/bolt/test/RISCV/reloc-got.s +++ b/bolt/test/RISCV/reloc-got.s @@ -8,16 +8,26 @@ d: .dword 0 + .globl e + .p2align 3 +e: + .dword 0 + .text .globl _start .p2align 1 // CHECK: Binary Function "_start" after building cfg { _start: nop // Here to not make the _start and .Ltmp0 symbols coincide -// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0 -// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0) + // CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0 + // CHECK: auipc t1, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp1 + // CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0) + // CHECK-NEXT: ld t1, %pcrel_lo(.Ltmp1)(t1) 1: auipc t0, %got_pcrel_hi(d) +2: + auipc t1, %got_pcrel_hi(e) ld t0, %pcrel_lo(1b)(t0) + ld t1, %pcrel_lo(2b)(t1) ret - .size _start, .-_start + .size _start, .-_start \ No newline at end of file