Skip to content

[CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds #90933

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1321,10 +1321,11 @@ static void runThinLTOBackend(
Conf.CGFileType = getCodeGenFileType(Action);
break;
}
if (Error E = thinBackend(
Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
/* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) {
if (Error E =
thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
/*ModuleMap=*/nullptr, Conf.CodeGenOnly,
/*IRAddStream=*/nullptr, CGOpts.CmdArgs)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
});
Expand Down
70 changes: 70 additions & 0 deletions llvm/include/llvm/CGData/CodeGenData.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
#define LLVM_CGDATA_CODEGENDATA_H

#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CGData/OutlinedHashTree.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/Triple.h"
#include <mutex>
Expand Down Expand Up @@ -164,6 +166,74 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}

struct StreamCacheData {
/// Backing buffer for serialized data stream.
SmallVector<SmallString<0>> Outputs;
/// Callback function to add serialized data to the stream.
AddStreamFn AddStream;
/// Backing buffer for cached data.
SmallVector<std::unique_ptr<MemoryBuffer>> Files;
/// Cache mechanism for storing data.
FileCache Cache;

StreamCacheData(unsigned Size, const FileCache &OrigCache,
const Twine &CachePrefix)
: Outputs(Size), Files(Size) {
AddStream = [&](size_t Task, const Twine &ModuleName) {
return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>(Outputs[Task]));
};

if (OrigCache.isValid()) {
auto CGCacheOrErr =
localCache("ThinLTO", CachePrefix, OrigCache.getCacheDirectoryPath(),
[&](size_t Task, const Twine &ModuleName,
std::unique_ptr<MemoryBuffer> MB) {
Files[Task] = std::move(MB);
});
if (Error Err = CGCacheOrErr.takeError())
report_fatal_error(std::move(Err));
Cache = std::move(*CGCacheOrErr);
}
}
StreamCacheData() = delete;

/// Retrieve results from either the cache or the stream.
std::unique_ptr<SmallVector<StringRef>> getResult() {
unsigned NumOutputs = Outputs.size();
auto Result = std::make_unique<SmallVector<StringRef>>(NumOutputs);
for (unsigned I = 0; I < NumOutputs; ++I)
if (Files[I])
(*Result)[I] = Files[I]->getBuffer();
else
(*Result)[I] = Outputs[I];
return Result;
}
};

/// Save \p TheModule before the first codegen round.
/// \p Task represents the partition number in the parallel code generation
/// process. \p AddStream is the callback used to add the serialized module to
/// the stream.
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
AddStreamFn AddStream);

/// Load the optimized bitcode module for the second codegen round.
/// \p OrigModule is the original bitcode module.
/// \p Task identifies the partition number in the parallel code generation
/// process. \p Context provides the environment settings for module operations.
/// \p IRFiles contains optimized bitcode module files needed for loading.
/// \return A unique_ptr to the loaded Module, or nullptr if loading fails.
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
LLVMContext &Context,
ArrayRef<StringRef> IRFiles);

/// Merge the codegen data from the scratch objects \p ObjectFiles from the
/// first codegen round.
/// \return the combined hash of the merged codegen data.
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjectFiles);

void warn(Error E, StringRef Whence = "");
void warn(Twine Message, std::string Whence = "", std::string Hint = "");

Expand Down
5 changes: 4 additions & 1 deletion llvm/include/llvm/CGData/CodeGenDataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,11 @@ class CodeGenDataReader {
/// Extract the cgdata embedded in sections from the given object file and
/// merge them into the GlobalOutlineRecord. This is a static helper that
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
/// the merged data.
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord);
OutlinedHashTreeRecord &GlobalOutlineRecord,
stable_hash *CombinedHash = nullptr);

protected:
/// The outlined hash tree that has been read. When it's released by
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/LTO/LTO.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ std::string computeLTOCacheKey(
const DenseSet<GlobalValue::GUID> &CfiFunctionDefs = {},
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {});

/// Recomputes the LTO cache key for a given key with an extra identifier.
std::string recomputeLTOCacheKey(const std::string &Key, StringRef ExtraID);

namespace lto {

StringLiteral getThinLTODefaultCPU(const Triple &TheTriple);
Expand Down
6 changes: 4 additions & 2 deletions llvm/include/llvm/LTO/LTOBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ Error backend(const Config &C, AddStreamFn AddStream,
/// are saved in the ModuleMap. If \p ModuleMap is nullptr, module files will
/// be mapped to memory on demand and at any given time during importing, only
/// one source module will be kept open at the most. If \p CodeGenOnly is true,
/// the backend will skip optimization and only perform code generation.
/// the backend will skip optimization and only perform code generation. If
/// \p IRAddStream is not nullptr, it will be called just before code generation
/// to serialize the optimized IR.
Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
Module &M, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> *ModuleMap,
bool CodeGenOnly,
bool CodeGenOnly, AddStreamFn IRAddStream = nullptr,
const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());

Error finalizeOptimizationRemarks(
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CGData/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ add_llvm_component_library(LLVMCGData
intrinsics_gen

LINK_COMPONENTS
BitReader
BitWriter
Core
Support
Object
Expand Down
67 changes: 66 additions & 1 deletion llvm/lib/CGData/CodeGenData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/WithColor.h"

#define DEBUG_TYPE "cg-data"
Expand All @@ -30,6 +32,11 @@ cl::opt<bool>
cl::opt<std::string>
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
cl::desc("File path to where .cgdata file is read"));
cl::opt<bool> CodeGenDataThinLTOTwoRounds(
"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
cl::desc("Enable two-round ThinLTO code generation. The first round "
"emits codegen data, while the second round uses the emitted "
"codegen data for further optimizations."));

static std::string getCGDataErrString(cgdata_error Err,
const std::string &ErrMsg = "") {
Expand Down Expand Up @@ -139,7 +146,7 @@ CodeGenData &CodeGenData::getInstance() {
std::call_once(CodeGenData::OnceFlag, []() {
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());

if (CodeGenDataGenerate)
if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
Instance->EmitCGData = true;
else if (!CodeGenDataUsePath.empty()) {
// Initialize the global CGData if the input file name is given.
Expand Down Expand Up @@ -215,6 +222,64 @@ void warn(Error E, StringRef Whence) {
}
}

void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
AddStreamFn AddStream) {
LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
<< " in Task " << Task << "\n");
Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
AddStream(Task, TheModule.getModuleIdentifier());
if (Error Err = StreamOrErr.takeError())
report_fatal_error(std::move(Err));
std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;

WriteBitcodeToFile(TheModule, *Stream->OS,
/*ShouldPreserveUseListOrder=*/true);
}

std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
LLVMContext &Context,
ArrayRef<StringRef> IRFiles) {
LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
<< " in Task " << Task << "\n");
auto FileBuffer = MemoryBuffer::getMemBuffer(
IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
if (!RestoredModule)
report_fatal_error(
Twine("Failed to parse optimized bitcode loaded for Task: ") +
Twine(Task) + "\n");

// Restore the original module identifier.
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
return std::move(*RestoredModule);
}

Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
OutlinedHashTreeRecord GlobalOutlineRecord;
stable_hash CombinedHash = 0;
for (auto File : ObjFiles) {
if (File.empty())
continue;
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
File, "in-memory object file", /*RequiresNullTerminator=*/false);
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
if (!BinOrErr)
return BinOrErr.takeError();

std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
if (auto E = CodeGenDataReader::mergeFromObjectFile(
Obj.get(), GlobalOutlineRecord, &CombinedHash))
return E;
}

if (!GlobalOutlineRecord.empty())
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));

return CombinedHash;
}

} // end namespace cgdata

} // end namespace llvm
7 changes: 5 additions & 2 deletions llvm/lib/CGData/CodeGenDataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
}

Error CodeGenDataReader::mergeFromObjectFile(
const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord) {
const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
stable_hash *CombinedHash) {
Triple TT = Obj->makeTriple();
auto CGOutLineName =
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
Expand All @@ -48,6 +48,9 @@ Error CodeGenDataReader::mergeFromObjectFile(
auto *EndData = Data + ContentsOrErr->size();

if (*NameOrErr == CGOutLineName) {
if (CombinedHash)
*CombinedHash =
stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
// In case dealing with an executable that has concatenated cgdata,
// we want to merge them into a single cgdata.
// Although it's not a typical workflow, we support this scenario.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/LTO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_llvm_component_library(LLVMLTO
BinaryFormat
BitReader
BitWriter
CGData
CodeGen
CodeGenTypes
Core
Expand Down
Loading
Loading