diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md index 8a3b2fcb5eb97..96a9a187e0acf 100644 --- a/bolt/docs/BAT.md +++ b/bolt/docs/BAT.md @@ -21,9 +21,9 @@ contain the metadata for input functions. # Internals ## Section contents The section is organized as follows: -- Functions table +- Hot functions table - Address translation tables -- Fragment linkage table +- Cold functions table ## Construction and parsing BAT section is created from `BoltAddressTranslation` class which captures @@ -43,7 +43,7 @@ and [BoltAddressTranslation.cpp](/bolt/lib/Profile/BoltAddressTranslation.cpp). ### Layout The general layout is as follows: ``` -Functions table header +Hot functions table header |------------------| | Function entry | | |--------------| | @@ -51,13 +51,17 @@ Functions table header | |--------------| | ~~~~~~~~~~~~~~~~~~~~ -Fragment linkage header +Cold functions table header |------------------| -| ColdAddr HotAddr | +| Function entry | +| |--------------| | +| | OutOff InOff | | +| |--------------| | ~~~~~~~~~~~~~~~~~~~~ ``` ### Functions table +Hot and cold functions tables share the encoding except difference marked below. Header: | Entry | Encoding | Description | | ------ | ----- | ----------- | @@ -66,9 +70,11 @@ Header: The header is followed by Functions table with `NumFuncs` entries. Output binary addresses are delta encoded, meaning that only the difference with the previous output address is stored. Addresses implicitly start at zero. +Hot indices are delta encoded, implicitly starting at zero. | Entry | Encoding | Description | | ------ | ------| ----------- | | `Address` | Delta, ULEB128 | Function address in the output binary | +| `HotIndex` | Delta, ULEB128 | Cold functions only: index of corresponding hot function in hot functions table | | `NumEntries` | ULEB128 | Number of address translation entries for a function | Function header is followed by `NumEntries` pairs of offsets for current @@ -85,17 +91,3 @@ entry is encoded. Offsets implicitly start at zero. `BRANCHENTRY` bit denotes whether a given offset pair is a control flow source (branch or call instruction). If not set, it signifies a control flow target (basic block offset). - -### Fragment linkage table -Following Functions table, fragment linkage table is encoded to link split -cold fragments with main (hot) fragment. -Header: -| Entry | Encoding | Description | -| ------ | ------------ | ----------- | -| `NumColdEntries` | ULEB128 | Number of split functions in the functions table | - -`NumColdEntries` pairs of addresses follow: -| Entry | Encoding | Description | -| ------ | ------| ----------- | -| `ColdAddress` | ULEB128 | Cold fragment address in output binary | -| `HotAddress` | ULEB128 | Hot fragment address in output binary | diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h index 5439412cb5725..01d3be4ee59be 100644 --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataExtractor.h" #include #include #include @@ -118,6 +119,16 @@ class BoltAddressTranslation { void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB, uint64_t FuncAddress); + /// Write the serialized address translation table for a function. + template + void writeMaps(std::map &Maps, raw_ostream &OS); + + /// Read the serialized address translation table for a function. + /// Return a parse error if failed. + template + void parseMaps(std::vector &HotFuncs, DataExtractor &DE, + uint64_t &Offset, Error &Err); + std::map Maps; /// Links outlined cold bocks to their original function diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index 19b63d49e3c7d..697ff1e5dd0da 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -8,7 +8,6 @@ #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Core/BinaryFunction.h" -#include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/LEB128.h" @@ -103,18 +102,42 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { } } - const uint32_t NumFuncs = Maps.size(); + writeMaps(Maps, OS); + writeMaps(Maps, OS); + + outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n"; +} + +template +void BoltAddressTranslation::writeMaps(std::map &Maps, + raw_ostream &OS) { + const uint32_t NumFuncs = + llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) { + return Cold == ColdPartSource.count(Address); + }); encodeULEB128(NumFuncs, OS); - LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n"); + LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "") + << " functions for BAT.\n"); + size_t PrevIndex = 0; + // Output addresses are delta-encoded uint64_t PrevAddress = 0; for (auto &MapEntry : Maps) { const uint64_t Address = MapEntry.first; + // Only process cold fragments in cold mode, and vice versa. + if (Cold != ColdPartSource.count(Address)) + continue; MapTy &Map = MapEntry.second; const uint32_t NumEntries = Map.size(); LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x" << Twine::utohexstr(Address) << ".\n"); encodeULEB128(Address - PrevAddress, OS); PrevAddress = Address; + if (Cold) { + size_t HotIndex = + std::distance(ColdPartSource.begin(), ColdPartSource.find(Address)); + encodeULEB128(HotIndex - PrevIndex, OS); + PrevIndex = HotIndex; + } encodeULEB128(NumEntries, OS); uint64_t InOffset = 0, OutOffset = 0; // Output and Input addresses and delta-encoded @@ -124,20 +147,6 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { std::tie(OutOffset, InOffset) = KeyVal; } } - const uint32_t NumColdEntries = ColdPartSource.size(); - LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries - << " cold part mappings.\n"); - encodeULEB128(NumColdEntries, OS); - for (std::pair &ColdEntry : ColdPartSource) { - encodeULEB128(ColdEntry.first, OS); - encodeULEB128(ColdEntry.second, OS); - LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> " - << Twine::utohexstr(ColdEntry.second) << "\n"); - } - - outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n"; - outs() << "BOLT-INFO: Wrote " << NumColdEntries - << " BAT cold-to-hot entries\n"; } std::error_code BoltAddressTranslation::parse(StringRef Buf) { @@ -160,12 +169,31 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) { return make_error_code(llvm::errc::io_error); Error Err(Error::success()); + std::vector HotFuncs; + parseMaps(HotFuncs, DE, Offset, Err); + parseMaps(HotFuncs, DE, Offset, Err); + outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n"; + return errorToErrorCode(std::move(Err)); +} + +template +void BoltAddressTranslation::parseMaps(std::vector &HotFuncs, + DataExtractor &DE, uint64_t &Offset, + Error &Err) { const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err); - LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n"); + LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "") + << " functions\n"); + size_t HotIndex = 0; uint64_t PrevAddress = 0; for (uint32_t I = 0; I < NumFunctions; ++I) { const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err); PrevAddress = Address; + if (Cold) { + HotIndex += DE.getULEB128(&Offset, &Err); + ColdPartSource.emplace(Address, HotFuncs[HotIndex]); + } else { + HotFuncs.push_back(Address); + } const uint32_t NumEntries = DE.getULEB128(&Offset, &Err); MapTy Map; @@ -178,28 +206,14 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) { OutputOffset += OutputDelta; InputOffset += InputDelta; Map.insert(std::pair(OutputOffset, InputOffset)); - LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputOffset) << " -> " - << Twine::utohexstr(InputOffset) << " (" << OutputDelta - << ", " << InputDelta << ")\n"); + LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b)\n", + OutputOffset, InputOffset, OutputDelta, + encodeULEB128(OutputDelta, nulls()), + InputDelta, + encodeSLEB128(InputDelta, nulls()))); } Maps.insert(std::pair(Address, Map)); } - - const uint32_t NumColdEntries = DE.getULEB128(&Offset, &Err); - LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n"); - for (uint32_t I = 0; I < NumColdEntries; ++I) { - const uint32_t ColdAddress = DE.getULEB128(&Offset, &Err); - const uint32_t HotAddress = DE.getULEB128(&Offset, &Err); - ColdPartSource.insert( - std::pair(ColdAddress, HotAddress)); - LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> " - << Twine::utohexstr(HotAddress) << "\n"); - } - outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n"; - outs() << "BOLT-INFO: Parsed " << NumColdEntries - << " BAT cold-to-hot entries\n"; - - return errorToErrorCode(std::move(Err)); } void BoltAddressTranslation::dump(raw_ostream &OS) { diff --git a/bolt/test/X86/bolt-address-translation.test b/bolt/test/X86/bolt-address-translation.test index a232f785b9cdb..430b4cb007310 100644 --- a/bolt/test/X86/bolt-address-translation.test +++ b/bolt/test/X86/bolt-address-translation.test @@ -36,8 +36,7 @@ # # CHECK: BOLT: 3 out of 7 functions were overwritten. # CHECK: BOLT-INFO: Wrote 6 BAT maps -# CHECK: BOLT-INFO: Wrote 3 BAT cold-to-hot entries -# CHECK: BOLT-INFO: BAT section size (bytes): 428 +# CHECK: BOLT-INFO: BAT section size (bytes): 404 # # usqrt mappings (hot part). We match against any key (left side containing # the bolted binary offsets) because BOLT may change where it puts instructions