|
| 1 | +//===- SectionOrderer.cpp---------------------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#include "lld/Common/SectionOrderer.h" |
| 10 | +#include "lld/Common/ErrorHandler.h" |
| 11 | +#include "llvm/ADT/DenseMap.h" |
| 12 | +#include "llvm/ADT/DenseSet.h" |
| 13 | +#include "llvm/ADT/SetVector.h" |
| 14 | +#include "llvm/ADT/SmallSet.h" |
| 15 | +#include "llvm/ADT/SmallVector.h" |
| 16 | +#include "llvm/ADT/StringMap.h" |
| 17 | +#include "llvm/ADT/StringRef.h" |
| 18 | +#include "llvm/ProfileData/InstrProfReader.h" |
| 19 | +#include "llvm/Support/BalancedPartitioning.h" |
| 20 | +#include "llvm/Support/TimeProfiler.h" |
| 21 | +#include "llvm/Support/VirtualFileSystem.h" |
| 22 | +#include "llvm/Support/xxhash.h" |
| 23 | + |
| 24 | +#define DEBUG_TYPE "bp-section-orderer" |
| 25 | +using namespace llvm; |
| 26 | +using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>; |
| 27 | + |
| 28 | +namespace lld { |
| 29 | + |
| 30 | +static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression( |
| 31 | + ArrayRef<const BPSectionBase *> sections, |
| 32 | + const DenseMap<const BPSectionBase *, uint64_t> §ionToIdx, |
| 33 | + ArrayRef<unsigned> sectionIdxs, |
| 34 | + DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs, |
| 35 | + BPFunctionNode::UtilityNodeT &maxUN) { |
| 36 | + TimeTraceScope timeScope("Build nodes for compression"); |
| 37 | + |
| 38 | + SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes; |
| 39 | + sectionHashes.reserve(sectionIdxs.size()); |
| 40 | + SmallVector<uint64_t> hashes; |
| 41 | + |
| 42 | + for (unsigned sectionIdx : sectionIdxs) { |
| 43 | + const auto *isec = sections[sectionIdx]; |
| 44 | + isec->getSectionHash(hashes, sectionToIdx); |
| 45 | + sectionHashes.emplace_back(sectionIdx, std::move(hashes)); |
| 46 | + hashes.clear(); |
| 47 | + } |
| 48 | + |
| 49 | + DenseMap<uint64_t, unsigned> hashFrequency; |
| 50 | + for (auto &[sectionIdx, hashes] : sectionHashes) |
| 51 | + for (auto hash : hashes) |
| 52 | + ++hashFrequency[hash]; |
| 53 | + |
| 54 | + if (duplicateSectionIdxs) { |
| 55 | + // Merge section that are nearly identical |
| 56 | + SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes; |
| 57 | + DenseMap<uint64_t, unsigned> wholeHashToSectionIdx; |
| 58 | + for (auto &[sectionIdx, hashes] : sectionHashes) { |
| 59 | + uint64_t wholeHash = 0; |
| 60 | + for (auto hash : hashes) |
| 61 | + if (hashFrequency[hash] > 5) |
| 62 | + wholeHash ^= hash; |
| 63 | + auto [it, wasInserted] = |
| 64 | + wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx)); |
| 65 | + if (wasInserted) { |
| 66 | + newSectionHashes.emplace_back(sectionIdx, hashes); |
| 67 | + } else { |
| 68 | + (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx); |
| 69 | + } |
| 70 | + } |
| 71 | + sectionHashes = newSectionHashes; |
| 72 | + |
| 73 | + // Recompute hash frequencies |
| 74 | + hashFrequency.clear(); |
| 75 | + for (auto &[sectionIdx, hashes] : sectionHashes) |
| 76 | + for (auto hash : hashes) |
| 77 | + ++hashFrequency[hash]; |
| 78 | + } |
| 79 | + |
| 80 | + // Filter rare and common hashes and assign each a unique utility node that |
| 81 | + // doesn't conflict with the trace utility nodes |
| 82 | + DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN; |
| 83 | + for (auto &[hash, frequency] : hashFrequency) { |
| 84 | + if (frequency <= 1 || frequency * 2 > sectionHashes.size()) |
| 85 | + continue; |
| 86 | + hashToUN[hash] = ++maxUN; |
| 87 | + } |
| 88 | + |
| 89 | + SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns; |
| 90 | + for (auto &[sectionIdx, hashes] : sectionHashes) { |
| 91 | + UtilityNodes uns; |
| 92 | + for (auto &hash : hashes) { |
| 93 | + auto it = hashToUN.find(hash); |
| 94 | + if (it != hashToUN.end()) |
| 95 | + uns.push_back(it->second); |
| 96 | + } |
| 97 | + sectionUns.emplace_back(sectionIdx, uns); |
| 98 | + } |
| 99 | + return sectionUns; |
| 100 | +} |
| 101 | + |
| 102 | +llvm::DenseMap<const BPSectionBase *, size_t> |
| 103 | +SectionOrderer::reorderSectionsByBalancedPartitioning( |
| 104 | + size_t &highestAvailablePriority, llvm::StringRef profilePath, |
| 105 | + bool forFunctionCompression, bool forDataCompression, |
| 106 | + bool compressionSortStartupFunctions, bool verbose, |
| 107 | + SmallVector<BPSectionBase *> inputSections) { |
| 108 | + TimeTraceScope timeScope("Balanced Partitioning"); |
| 109 | + SmallVector<const BPSectionBase *> sections; |
| 110 | + DenseMap<const BPSectionBase *, uint64_t> sectionToIdx; |
| 111 | + StringMap<DenseSet<unsigned>> symbolToSectionIdxs; |
| 112 | + |
| 113 | + // Process input sections |
| 114 | + for (const auto *isec : inputSections) { |
| 115 | + if (!isec->hasValidData()) |
| 116 | + continue; |
| 117 | + |
| 118 | + unsigned sectionIdx = sections.size(); |
| 119 | + sectionToIdx.try_emplace(isec, sectionIdx); |
| 120 | + sections.push_back(isec); |
| 121 | + |
| 122 | + for (auto *sym : isec->getSymbols()) { |
| 123 | + if (auto *d = sym->asDefinedSymbol()) |
| 124 | + symbolToSectionIdxs[d->getName()].insert(sectionIdx); |
| 125 | + } |
| 126 | + } |
| 127 | + StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs; |
| 128 | + for (auto &entry : symbolToSectionIdxs) { |
| 129 | + StringRef name = entry.getKey(); |
| 130 | + auto §ionIdxs = entry.getValue(); |
| 131 | + name = BPSectionBase::getRootSymbol(name); |
| 132 | + rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(), |
| 133 | + sectionIdxs.end()); |
| 134 | + // Linkage names can be prefixed with "_" or "l_" on Mach-O. See |
| 135 | + // Mangler::getNameWithPrefix() for details. |
| 136 | + if (name.consume_front("_") || name.consume_front("l_")) |
| 137 | + rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(), |
| 138 | + sectionIdxs.end()); |
| 139 | + } |
| 140 | + |
| 141 | + BPFunctionNode::UtilityNodeT maxUN = 0; |
| 142 | + DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs; |
| 143 | + // Used to define the initial order for startup functions. |
| 144 | + DenseMap<unsigned, size_t> sectionIdxToTimestamp; |
| 145 | + std::unique_ptr<InstrProfReader> reader; |
| 146 | + if (!profilePath.empty()) { |
| 147 | + auto fs = vfs::getRealFileSystem(); |
| 148 | + auto readerOrErr = InstrProfReader::create(profilePath, *fs); |
| 149 | + lld::checkError(readerOrErr.takeError()); |
| 150 | + |
| 151 | + reader = std::move(readerOrErr.get()); |
| 152 | + for (auto &entry : *reader) { |
| 153 | + // Read all entries |
| 154 | + (void)entry; |
| 155 | + } |
| 156 | + auto &traces = reader->getTemporalProfTraces(); |
| 157 | + |
| 158 | + DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN; |
| 159 | + for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) { |
| 160 | + uint64_t currentSize = 0, cutoffSize = 1; |
| 161 | + size_t cutoffTimestamp = 1; |
| 162 | + auto &trace = traces[traceIdx].FunctionNameRefs; |
| 163 | + for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) { |
| 164 | + auto [Filename, ParsedFuncName] = getParsedIRPGOName( |
| 165 | + reader->getSymtab().getFuncOrVarName(trace[timestamp])); |
| 166 | + ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName); |
| 167 | + |
| 168 | + auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName); |
| 169 | + if (sectionIdxsIt == rootSymbolToSectionIdxs.end()) |
| 170 | + continue; |
| 171 | + auto §ionIdxs = sectionIdxsIt->getValue(); |
| 172 | + // If the same symbol is found in multiple sections, they might be |
| 173 | + // identical, so we arbitrarily use the size from the first section. |
| 174 | + currentSize += sections[*sectionIdxs.begin()]->getSize(); |
| 175 | + |
| 176 | + // Since BalancedPartitioning is sensitive to the initial order, we need |
| 177 | + // to explicitly define it to be ordered by earliest timestamp. |
| 178 | + for (unsigned sectionIdx : sectionIdxs) { |
| 179 | + auto [it, wasInserted] = |
| 180 | + sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp); |
| 181 | + if (!wasInserted) |
| 182 | + it->getSecond() = std::min<size_t>(it->getSecond(), timestamp); |
| 183 | + } |
| 184 | + |
| 185 | + if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) { |
| 186 | + ++maxUN; |
| 187 | + cutoffSize = 2 * currentSize; |
| 188 | + cutoffTimestamp = 2 * cutoffTimestamp; |
| 189 | + } |
| 190 | + for (unsigned sectionIdx : sectionIdxs) |
| 191 | + sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN); |
| 192 | + } |
| 193 | + for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN) |
| 194 | + for (auto un = firstUN; un <= maxUN; ++un) |
| 195 | + startupSectionIdxUNs[sectionIdx].push_back(un); |
| 196 | + ++maxUN; |
| 197 | + sectionIdxToFirstUN.clear(); |
| 198 | + } |
| 199 | + } |
| 200 | + |
| 201 | + SmallVector<unsigned> sectionIdxsForFunctionCompression, |
| 202 | + sectionIdxsForDataCompression; |
| 203 | + for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) { |
| 204 | + if (startupSectionIdxUNs.count(sectionIdx)) |
| 205 | + continue; |
| 206 | + const auto *isec = sections[sectionIdx]; |
| 207 | + if (isec->isCodeSection()) { |
| 208 | + if (forFunctionCompression) |
| 209 | + sectionIdxsForFunctionCompression.push_back(sectionIdx); |
| 210 | + } else { |
| 211 | + if (forDataCompression) |
| 212 | + sectionIdxsForDataCompression.push_back(sectionIdx); |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + if (compressionSortStartupFunctions) { |
| 217 | + SmallVector<unsigned> startupIdxs; |
| 218 | + for (auto &[sectionIdx, uns] : startupSectionIdxUNs) |
| 219 | + startupIdxs.push_back(sectionIdx); |
| 220 | + auto unsForStartupFunctionCompression = |
| 221 | + getUnsForCompression(sections, sectionToIdx, startupIdxs, |
| 222 | + /*duplicateSectionIdxs=*/nullptr, maxUN); |
| 223 | + for (auto &[sectionIdx, compressionUns] : |
| 224 | + unsForStartupFunctionCompression) { |
| 225 | + auto &uns = startupSectionIdxUNs[sectionIdx]; |
| 226 | + uns.append(compressionUns); |
| 227 | + llvm::sort(uns); |
| 228 | + uns.erase(std::unique(uns.begin(), uns.end()), uns.end()); |
| 229 | + } |
| 230 | + } |
| 231 | + |
| 232 | + // Map a section index (order directly) to a list of duplicate section indices |
| 233 | + // (not ordered directly). |
| 234 | + DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs; |
| 235 | + auto unsForFunctionCompression = getUnsForCompression( |
| 236 | + sections, sectionToIdx, sectionIdxsForFunctionCompression, |
| 237 | + &duplicateSectionIdxs, maxUN); |
| 238 | + auto unsForDataCompression = getUnsForCompression( |
| 239 | + sections, sectionToIdx, sectionIdxsForDataCompression, |
| 240 | + &duplicateSectionIdxs, maxUN); |
| 241 | + |
| 242 | + std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression, |
| 243 | + nodesForDataCompression; |
| 244 | + for (auto &[sectionIdx, uns] : startupSectionIdxUNs) |
| 245 | + nodesForStartup.emplace_back(sectionIdx, uns); |
| 246 | + for (auto &[sectionIdx, uns] : unsForFunctionCompression) |
| 247 | + nodesForFunctionCompression.emplace_back(sectionIdx, uns); |
| 248 | + for (auto &[sectionIdx, uns] : unsForDataCompression) |
| 249 | + nodesForDataCompression.emplace_back(sectionIdx, uns); |
| 250 | + |
| 251 | + // Use the first timestamp to define the initial order for startup nodes. |
| 252 | + llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) { |
| 253 | + return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) < |
| 254 | + std::make_pair(sectionIdxToTimestamp[R.Id], R.Id); |
| 255 | + }); |
| 256 | + // Sort compression nodes by their Id (which is the section index) because the |
| 257 | + // input linker order tends to be not bad. |
| 258 | + llvm::sort(nodesForFunctionCompression, |
| 259 | + [](auto &L, auto &R) { return L.Id < R.Id; }); |
| 260 | + llvm::sort(nodesForDataCompression, |
| 261 | + [](auto &L, auto &R) { return L.Id < R.Id; }); |
| 262 | + |
| 263 | + { |
| 264 | + TimeTraceScope timeScope("Balanced Partitioning"); |
| 265 | + BalancedPartitioningConfig config; |
| 266 | + BalancedPartitioning bp(config); |
| 267 | + bp.run(nodesForStartup); |
| 268 | + bp.run(nodesForFunctionCompression); |
| 269 | + bp.run(nodesForDataCompression); |
| 270 | + } |
| 271 | + |
| 272 | + unsigned numStartupSections = 0; |
| 273 | + unsigned numCodeCompressionSections = 0; |
| 274 | + unsigned numDuplicateCodeSections = 0; |
| 275 | + unsigned numDataCompressionSections = 0; |
| 276 | + unsigned numDuplicateDataSections = 0; |
| 277 | + SetVector<const BPSectionBase *> orderedSections; |
| 278 | + // Order startup functions, |
| 279 | + for (auto &node : nodesForStartup) { |
| 280 | + const auto *isec = sections[node.Id]; |
| 281 | + if (orderedSections.insert(isec)) |
| 282 | + ++numStartupSections; |
| 283 | + } |
| 284 | + // then functions for compression, |
| 285 | + for (auto &node : nodesForFunctionCompression) { |
| 286 | + const auto *isec = sections[node.Id]; |
| 287 | + if (orderedSections.insert(isec)) |
| 288 | + ++numCodeCompressionSections; |
| 289 | + |
| 290 | + auto It = duplicateSectionIdxs.find(node.Id); |
| 291 | + if (It == duplicateSectionIdxs.end()) |
| 292 | + continue; |
| 293 | + for (auto dupSecIdx : It->getSecond()) { |
| 294 | + const auto *dupIsec = sections[dupSecIdx]; |
| 295 | + if (orderedSections.insert(dupIsec)) |
| 296 | + ++numDuplicateCodeSections; |
| 297 | + } |
| 298 | + } |
| 299 | + // then data for compression. |
| 300 | + for (auto &node : nodesForDataCompression) { |
| 301 | + const auto *isec = sections[node.Id]; |
| 302 | + if (orderedSections.insert(isec)) |
| 303 | + ++numDataCompressionSections; |
| 304 | + auto It = duplicateSectionIdxs.find(node.Id); |
| 305 | + if (It == duplicateSectionIdxs.end()) |
| 306 | + continue; |
| 307 | + for (auto dupSecIdx : It->getSecond()) { |
| 308 | + const auto *dupIsec = sections[dupSecIdx]; |
| 309 | + if (orderedSections.insert(dupIsec)) |
| 310 | + ++numDuplicateDataSections; |
| 311 | + } |
| 312 | + } |
| 313 | + |
| 314 | + if (verbose) { |
| 315 | + unsigned numTotalOrderedSections = |
| 316 | + numStartupSections + numCodeCompressionSections + |
| 317 | + numDuplicateCodeSections + numDataCompressionSections + |
| 318 | + numDuplicateDataSections; |
| 319 | + dbgs() |
| 320 | + << "Ordered " << numTotalOrderedSections |
| 321 | + << " sections using balanced partitioning:\n Functions for startup: " |
| 322 | + << numStartupSections |
| 323 | + << "\n Functions for compression: " << numCodeCompressionSections |
| 324 | + << "\n Duplicate functions: " << numDuplicateCodeSections |
| 325 | + << "\n Data for compression: " << numDataCompressionSections |
| 326 | + << "\n Duplicate data: " << numDuplicateDataSections << "\n"; |
| 327 | + |
| 328 | + if (!profilePath.empty()) { |
| 329 | + // Evaluate this function order for startup |
| 330 | + StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers; |
| 331 | + const uint64_t pageSize = (1 << 14); |
| 332 | + uint64_t currentAddress = 0; |
| 333 | + for (const auto *isec : orderedSections) { |
| 334 | + for (auto *sym : isec->getSymbols()) { |
| 335 | + if (auto *d = sym->asDefinedSymbol()) { |
| 336 | + uint64_t startAddress = currentAddress + d->getValue(); |
| 337 | + uint64_t endAddress = startAddress + d->getSize(); |
| 338 | + uint64_t firstPage = startAddress / pageSize; |
| 339 | + // I think the kernel might pull in a few pages when one it touched, |
| 340 | + // so it might be more accurate to force lastPage to be aligned by |
| 341 | + // 4? |
| 342 | + uint64_t lastPage = endAddress / pageSize; |
| 343 | + StringRef rootSymbol = d->getName(); |
| 344 | + rootSymbol = BPSectionBase::getRootSymbol(rootSymbol); |
| 345 | + symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); |
| 346 | + if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_")) |
| 347 | + symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); |
| 348 | + } |
| 349 | + } |
| 350 | + currentAddress += isec->getSize(); |
| 351 | + } |
| 352 | + |
| 353 | + // The area under the curve F where F(t) is the total number of page |
| 354 | + // faults at step t. |
| 355 | + unsigned area = 0; |
| 356 | + for (auto &trace : reader->getTemporalProfTraces()) { |
| 357 | + SmallSet<uint64_t, 0> touchedPages; |
| 358 | + for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) { |
| 359 | + auto traceId = trace.FunctionNameRefs[step]; |
| 360 | + auto [Filename, ParsedFuncName] = |
| 361 | + getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId)); |
| 362 | + ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName); |
| 363 | + auto it = symbolToPageNumbers.find(ParsedFuncName); |
| 364 | + if (it != symbolToPageNumbers.end()) { |
| 365 | + auto &[firstPage, lastPage] = it->getValue(); |
| 366 | + for (uint64_t i = firstPage; i <= lastPage; i++) |
| 367 | + touchedPages.insert(i); |
| 368 | + } |
| 369 | + area += touchedPages.size(); |
| 370 | + } |
| 371 | + } |
| 372 | + dbgs() << "Total area under the page fault curve: " << (float)area |
| 373 | + << "\n"; |
| 374 | + } |
| 375 | + } |
| 376 | + |
| 377 | + DenseMap<const BPSectionBase *, size_t> sectionPriorities; |
| 378 | + for (const auto *isec : orderedSections) |
| 379 | + sectionPriorities[isec] = --highestAvailablePriority; |
| 380 | + return sectionPriorities; |
| 381 | +} |
| 382 | + |
| 383 | +} // namespace lld |
0 commit comments