Skip to content

[BOLT] Add BB index to BAT #86044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions bolt/docs/BAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,12 @@ current function.
### Address translation table
Delta encoding means that only the difference with the previous corresponding
entry is encoded. Input offsets implicitly start at zero.
| Entry | Encoding | Description |
| ------ | ------| ----------- |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary |
| Entry | Encoding | Description | Branch/BB |
| ------ | ------| ----------- | ------ |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both |
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |

`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target
Expand Down
7 changes: 6 additions & 1 deletion bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ class BoltAddressTranslation {
/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t OutputAddress) const;

/// Returns BB index by function output address (after BOLT) and basic block
/// input offset.
unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;

/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }

Expand Down Expand Up @@ -154,7 +158,8 @@ class BoltAddressTranslation {

std::map<uint64_t, MapTy> Maps;

using BBHashMap = std::unordered_map<uint32_t, size_t>;
/// Map basic block input offset to a basic block index and hash pair.
using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;

/// Links outlined cold bocks to their original function
Expand Down
39 changes: 29 additions & 10 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
getBBHash(HotFuncAddress, BBInputOffset)));
(void)HotFuncAddress;
LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
getBBIndex(HotFuncAddress, BBInputOffset)));
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
Expand Down Expand Up @@ -217,6 +219,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
}
size_t Index = 0;
uint64_t InOffset = 0;
size_t PrevBBIndex = 0;
// Output and Input addresses and delta-encoded
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
const uint64_t OutputAddress = KeyVal.first + Address;
Expand All @@ -226,11 +229,15 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
encodeSLEB128(KeyVal.second - InOffset, OS);
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
if ((InOffset & BRANCHENTRY) == 0) {
// Basic block hash
size_t BBHash = FuncHashPair.second[InOffset >> 1];
unsigned BBIndex;
size_t BBHash;
std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
OS.write(reinterpret_cast<char *>(&BBHash), 8);
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
InOffset >> 1, BBHash));
// Basic block index in the input binary
encodeULEB128(BBIndex - PrevBBIndex, OS);
PrevBBIndex = BBIndex;
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
InOffset >> 1, BBHash, BBIndex));
}
}
}
Expand Down Expand Up @@ -316,6 +323,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
uint64_t InputOffset = 0;
size_t BBIndex = 0;
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
const uint64_t OutputAddress = PrevAddress + OutputDelta;
Expand All @@ -330,19 +338,25 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
}
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
size_t BBHash = 0;
size_t BBIndexDelta = 0;
const bool IsBranchEntry = InputOffset & BRANCHENTRY;
if (!IsBranchEntry) {
BBHash = DE.getU64(&Offset, &Err);
BBIndexDelta = DE.getULEB128(&Offset, &Err);
BBIndex += BBIndexDelta;
// Map basic block hash to hot fragment by input offset
FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
std::pair(BBIndex, BBHash));
}
LLVM_DEBUG({
dbgs() << formatv(
"{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
if (BBHash)
dbgs() << formatv(" {0:x}", BBHash);
if (!IsBranchEntry) {
dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
getULEB128Size(BBIndexDelta));
}
dbgs() << '\n';
});
}
Expand Down Expand Up @@ -494,14 +508,19 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
FuncHashes[BF.getAddress()].first = BF.computeHash();
BF.computeBlockHashes();
for (const BinaryBasicBlock &BB : BF)
FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
BB.getHash());
FuncHashes[BF.getAddress()].second.emplace(
BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
}
}

unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
}

size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
}

size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/bolt-address-translation-yaml.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s

WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 344
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 376

READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/bolt-address-translation.test
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# CHECK: BOLT: 3 out of 7 functions were overwritten.
# CHECK: BOLT-INFO: Wrote 6 BAT maps
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
# CHECK: BOLT-INFO: BAT section size (bytes): 816
# CHECK: BOLT-INFO: BAT section size (bytes): 920
#
# usqrt mappings (hot part). We match against any key (left side containing
# the bolted binary offsets) because BOLT may change where it puts instructions
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5863,8 +5863,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
} else if (Triple.getArch() == llvm::Triple::x86_64) {
Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"},
CM);
} else if (Triple.isNVPTX() || Triple.isAMDGPU() || Triple.isSPIRV()) {
// NVPTX/AMDGPU/SPIRV does not care about the code model and will accept
} else if (Triple.isNVPTX() || Triple.isAMDGPU()) {
// NVPTX/AMDGPU does not care about the code model and will accept
// whatever works for the host.
Ok = true;
} else if (Triple.isSPARC64()) {
Expand Down
1 change: 0 additions & 1 deletion clang/test/Driver/unsupported-option-gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@
// DEFINE: %{check} = %clang -### --target=x86_64-linux-gnu -c -mcmodel=medium

// RUN: %{check} -x cuda %s --cuda-path=%S/Inputs/CUDA/usr/local/cuda --offload-arch=sm_60 --no-cuda-version-check -fbasic-block-sections=all
// RUN: %{check} -x hip %s --offload=spirv64 -nogpulib -nogpuinc
// RUN: %{check} -x hip %s --rocm-path=%S/Inputs/rocm -nogpulib -nogpuinc
42 changes: 38 additions & 4 deletions lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ static void replaceCommonSymbols() {
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
addInputSection(isec);
inputSections.push_back(isec);

// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
// and pass them on here.
Expand Down Expand Up @@ -1220,18 +1220,53 @@ static void createFiles(const InputArgList &args) {

static void gatherInputSections() {
TimeTraceScope timeScope("Gathering input sections");
int inputOrder = 0;
for (const InputFile *file : inputFiles) {
for (const Section *section : file->sections) {
// Compact unwind entries require special handling elsewhere. (In
// contrast, EH frames are handled like regular ConcatInputSections.)
if (section->name == section_names::compactUnwind)
continue;
for (const Subsection &subsection : section->subsections)
addInputSection(subsection.isec);
ConcatOutputSection *osec = nullptr;
for (const Subsection &subsection : section->subsections) {
if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
if (isec->isCoalescedWeak())
continue;
if (config->emitInitOffsets &&
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
in.initOffsets->addInput(isec);
continue;
}
isec->outSecOff = inputOrder++;
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
} else if (auto *isec =
dyn_cast<CStringInputSection>(subsection.isec)) {
if (isec->getName() == section_names::objcMethname) {
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
in.objcMethnameSection->inputOrder = inputOrder++;
in.objcMethnameSection->addInput(isec);
} else {
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
in.cStringSection->inputOrder = inputOrder++;
in.cStringSection->addInput(isec);
}
} else if (auto *isec =
dyn_cast<WordLiteralInputSection>(subsection.isec)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
in.wordLiteralSection->inputOrder = inputOrder++;
in.wordLiteralSection->addInput(isec);
} else {
llvm_unreachable("unexpected input section kind");
}
}
}
if (!file->objCImageInfo.empty())
in.objCImageInfo->addFile(file);
}
assert(inputOrder <= UnspecifiedInputOrder);
}

static void foldIdenticalLiterals() {
Expand Down Expand Up @@ -1387,7 +1422,6 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
concatOutputSections.clear();
inputFiles.clear();
inputSections.clear();
inputSectionsOrder = 0;
loadedArchives.clear();
loadedObjectFrameworks.clear();
missingAutolinkWarnings.clear();
Expand Down
38 changes: 0 additions & 38 deletions lld/MachO/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,44 +37,6 @@ static_assert(sizeof(void *) != 8 ||
"instances of it");

std::vector<ConcatInputSection *> macho::inputSections;
int macho::inputSectionsOrder = 0;

// Call this function to add a new InputSection and have it routed to the
// appropriate container. Depending on its type and current config, it will
// either be added to 'inputSections' vector or to a synthetic section.
void lld::macho::addInputSection(InputSection *inputSection) {
if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
if (isec->isCoalescedWeak())
return;
if (config->emitInitOffsets &&
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
in.initOffsets->addInput(isec);
return;
}
isec->outSecOff = inputSectionsOrder++;
auto *osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
} else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
if (isec->getName() == section_names::objcMethname) {
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
in.objcMethnameSection->inputOrder = inputSectionsOrder++;
in.objcMethnameSection->addInput(isec);
} else {
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
in.cStringSection->inputOrder = inputSectionsOrder++;
in.cStringSection->addInput(isec);
}
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
in.wordLiteralSection->inputOrder = inputSectionsOrder++;
in.wordLiteralSection->addInput(isec);
} else {
llvm_unreachable("unexpected input section kind");
}

assert(inputSectionsOrder <= UnspecifiedInputOrder);
}

uint64_t InputSection::getFileSize() const {
return isZeroFill(getFlags()) ? 0 : getSize();
Expand Down
3 changes: 0 additions & 3 deletions lld/MachO/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,6 @@ bool isEhFrameSection(const InputSection *);
bool isGccExceptTabSection(const InputSection *);

extern std::vector<ConcatInputSection *> inputSections;
// This is used as a counter for specyfing input order for input sections
extern int inputSectionsOrder;

namespace section_names {

Expand Down Expand Up @@ -371,7 +369,6 @@ constexpr const char addrSig[] = "__llvm_addrsig";

} // namespace section_names

void addInputSection(InputSection *inputSection);
} // namespace macho

std::string toString(const macho::InputSection *);
Expand Down
16 changes: 7 additions & 9 deletions lld/MachO/ObjC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ void ObjcCategoryMerger::emitAndLinkProtocolList(
infoCategoryWriter.catPtrListInfo.align);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
listSec->live = true;
addInputSection(listSec);
allInputSections.push_back(listSec);

listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;

Expand Down Expand Up @@ -848,7 +848,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
infoCategoryWriter.catPtrListInfo.align);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
listSec->live = true;
addInputSection(listSec);
allInputSections.push_back(listSec);

listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;

Expand Down Expand Up @@ -889,7 +889,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName,
bodyData, infoCategoryWriter.catListInfo.align);
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
newCatList->live = true;
addInputSection(newCatList);
allInputSections.push_back(newCatList);

newCatList->parent = infoCategoryWriter.catListInfo.outputSection;

Expand Down Expand Up @@ -927,7 +927,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
bodyData, infoCategoryWriter.catBodyInfo.align);
newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
newBodySec->live = true;
addInputSection(newBodySec);
allInputSections.push_back(newBodySec);

std::string symName =
objc::symbol_names::category + baseClassName + "_$_(" + name + ")";
Expand Down Expand Up @@ -1132,7 +1132,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
infoCategoryWriter.catListInfo.align);
listSec->parent = infoCategoryWriter.catListInfo.outputSection;
listSec->live = true;
addInputSection(listSec);
allInputSections.push_back(listSec);

std::string slotSymName = "<__objc_catlist slot for category ";
slotSymName += nonErasedCatBody->getName();
Expand Down Expand Up @@ -1221,11 +1221,9 @@ void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }

StringRef ObjcCategoryMerger::newStringData(const char *str) {
uint32_t len = strlen(str);
uint32_t bufSize = len + 1;
auto &data = newSectionData(bufSize);
auto &data = newSectionData(len + 1);
char *strData = reinterpret_cast<char *>(data.data());
// Copy the string chars and null-terminator
memcpy(strData, str, bufSize);
strncpy(strData, str, len);
return StringRef(strData, len);
}

Expand Down
4 changes: 2 additions & 2 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ void StubHelperSection::setUp() {

in.imageLoaderCache->parent =
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
addInputSection(in.imageLoaderCache);
inputSections.push_back(in.imageLoaderCache);
// Since this isn't in the symbol table or in any input file, the noDeadStrip
// argument doesn't matter.
dyldPrivate =
Expand Down Expand Up @@ -855,7 +855,7 @@ ConcatInputSection *ObjCSelRefsSection::makeSelRef(StringRef methname) {
/*addend=*/static_cast<int64_t>(methnameOffset),
/*referent=*/in.objcMethnameSection->isec});
objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
addInputSection(objcSelref);
inputSections.push_back(objcSelref);
objcSelref->isFinal = true;
methnameToSelref[CachedHashStringRef(methname)] = objcSelref;
return objcSelref;
Expand Down
Loading