Skip to content

Commit a566ab0

Browse files
committed
[CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds
1 parent c1a0219 commit a566ab0

File tree

7 files changed

+302
-6
lines changed

7 files changed

+302
-6
lines changed

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
164164
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
165165
}
166166

167+
/// Initialize the two-codegen rounds.
168+
void initializeTwoCodegenRounds();
169+
170+
/// Save the current module before the first codegen round.
171+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
172+
173+
/// Load the current module before the second codegen round.
174+
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
175+
unsigned Task,
176+
LLVMContext &Context);
177+
178+
/// Merge the codegen data from the input files in scratch vector in ThinLTO
179+
/// two-codegen rounds.
180+
Error mergeCodeGenData(
181+
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);
182+
167183
void warn(Error E, StringRef Whence = "");
168184
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
169185

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/Object/ObjectFile.h"
1818
#include "llvm/Support/CommandLine.h"
1919
#include "llvm/Support/FileSystem.h"
20+
#include "llvm/Support/Path.h"
2021
#include "llvm/Support/WithColor.h"
2122

2223
#define DEBUG_TYPE "cg-data"
@@ -30,6 +31,14 @@ cl::opt<bool>
3031
cl::opt<std::string>
3132
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
3233
cl::desc("File path to where .cgdata file is read"));
34+
cl::opt<bool> CodeGenDataThinLTOTwoRounds(
35+
"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
36+
cl::desc("Enable two-round ThinLTO code generation. The first round "
37+
"emits codegen data, while the second round uses the emitted "
38+
"codegen data for further optimizations."));
39+
40+
// Path to where the optimized bitcodes are saved and restored for ThinLTO.
41+
static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;
3342

3443
static std::string getCGDataErrString(cgdata_error Err,
3544
const std::string &ErrMsg = "") {
@@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() {
139148
std::call_once(CodeGenData::OnceFlag, []() {
140149
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
141150

142-
if (CodeGenDataGenerate)
151+
if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
143152
Instance->EmitCGData = true;
144153
else if (!CodeGenDataUsePath.empty()) {
145154
// Initialize the global CGData if the input file name is given.
@@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) {
215224
}
216225
}
217226

227+
static std::string getPath(StringRef Dir, unsigned Task) {
228+
return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str();
229+
}
230+
231+
void initializeTwoCodegenRounds() {
232+
assert(CodeGenDataThinLTOTwoRounds);
233+
if (auto EC = llvm::sys::fs::createUniqueDirectory(
234+
"cgdata", CodeGenDataThinLTOTwoRoundsPath))
235+
report_fatal_error(Twine("Failed to create directory: ") + EC.message());
236+
}
237+
238+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
239+
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
240+
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
241+
std::error_code EC;
242+
raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
243+
if (EC)
244+
report_fatal_error(Twine("Failed to open ") + Path +
245+
" to save optimized bitcode: " + EC.message());
246+
WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
247+
}
248+
249+
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
250+
unsigned Task,
251+
LLVMContext &Context) {
252+
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
253+
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
254+
auto FileOrError = MemoryBuffer::getFile(Path);
255+
if (auto EC = FileOrError.getError())
256+
report_fatal_error(Twine("Failed to open ") + Path +
257+
" to load optimized bitcode: " + EC.message());
258+
259+
std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
260+
auto RestoredModule = llvm::parseBitcodeFile(*FileBuffer, Context);
261+
if (!RestoredModule)
262+
report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
263+
Path + "\n");
264+
265+
// Restore the original module identifier.
266+
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
267+
return std::move(*RestoredModule);
268+
}
269+
270+
Error mergeCodeGenData(
271+
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {
272+
273+
OutlinedHashTreeRecord GlobalOutlineRecord;
274+
for (auto &InputFile : *(InputFiles)) {
275+
if (InputFile.empty())
276+
continue;
277+
StringRef File = StringRef(InputFile.data(), InputFile.size());
278+
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
279+
File, "in-memory object file", /*RequiresNullTerminator=*/false);
280+
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
281+
object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
282+
if (!BinOrErr)
283+
return BinOrErr.takeError();
284+
285+
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
286+
if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
287+
GlobalOutlineRecord))
288+
return E;
289+
}
290+
291+
if (!GlobalOutlineRecord.empty())
292+
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
293+
294+
return Error::success();
295+
}
296+
218297
} // end namespace cgdata
219298

220299
} // end namespace llvm

llvm/lib/LTO/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMLTO
2121
BinaryFormat
2222
BitReader
2323
BitWriter
24+
CGData
2425
CodeGen
2526
CodeGenTypes
2627
Core

llvm/lib/LTO/LTO.cpp

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Analysis/TargetTransformInfo.h"
2222
#include "llvm/Bitcode/BitcodeReader.h"
2323
#include "llvm/Bitcode/BitcodeWriter.h"
24+
#include "llvm/CGData/CodeGenData.h"
2425
#include "llvm/CodeGen/Analysis.h"
2526
#include "llvm/Config/llvm-config.h"
2627
#include "llvm/IR/AutoUpgrade.h"
@@ -70,6 +71,8 @@ static cl::opt<bool>
7071
DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
7172
cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
7273

74+
extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
75+
7376
namespace llvm {
7477
/// Enable global value internalization in LTO.
7578
cl::opt<bool> EnableLTOInternalization(
@@ -1458,7 +1461,7 @@ class InProcessThinBackend : public ThinBackendProc {
14581461
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
14591462
}
14601463

1461-
Error runThinLTOBackendThread(
1464+
virtual Error runThinLTOBackendThread(
14621465
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
14631466
ModuleSummaryIndex &CombinedIndex,
14641467
const FunctionImporter::ImportMapTy &ImportList,
@@ -1559,6 +1562,60 @@ class InProcessThinBackend : public ThinBackendProc {
15591562
return BackendThreadPool.getMaxConcurrency();
15601563
}
15611564
};
1565+
1566+
/// This Backend will run ThinBackend process but throw away all the output from
1567+
/// the codegen. This class facilitates the first codegen round.
1568+
class NoOutputThinBackend : public InProcessThinBackend {
1569+
public:
1570+
NoOutputThinBackend(
1571+
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1572+
ThreadPoolStrategy ThinLTOParallelism,
1573+
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1574+
std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch)
1575+
: InProcessThinBackend(
1576+
Conf, CombinedIndex, ThinLTOParallelism, ModuleToDefinedGVSummaries,
1577+
// Allocate a scratch buffer for each task to write output to.
1578+
[Allocation = &*Scratch](unsigned Task, const Twine &ModuleName) {
1579+
return std::make_unique<CachedFileStream>(
1580+
std::make_unique<raw_svector_ostream>((*Allocation)[Task]));
1581+
},
1582+
FileCache(), nullptr, false, false),
1583+
Scratch(std::move(Scratch)) {}
1584+
1585+
/// Scratch space for writing output during the codegen.
1586+
std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch;
1587+
};
1588+
1589+
/// This Backend performs codegen on bitcode that was previously saved after
1590+
/// going through optimization. This class facilitates the second codegen round.
1591+
class OptimizedBitcodeThinBackend : public InProcessThinBackend {
1592+
public:
1593+
OptimizedBitcodeThinBackend(
1594+
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1595+
ThreadPoolStrategy ThinLTOParallelism,
1596+
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1597+
AddStreamFn AddStream)
1598+
: InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1599+
ModuleToDefinedGVSummaries, AddStream, FileCache(),
1600+
nullptr, false, false) {}
1601+
1602+
virtual Error runThinLTOBackendThread(
1603+
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1604+
ModuleSummaryIndex &CombinedIndex,
1605+
const FunctionImporter::ImportMapTy &ImportList,
1606+
const FunctionImporter::ExportSetTy &ExportList,
1607+
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1608+
const GVSummaryMapTy &DefinedGlobals,
1609+
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1610+
LTOLLVMContext BackendContext(Conf);
1611+
std::unique_ptr<Module> LoadedModule =
1612+
cgdata::loadModuleForTwoRounds(BM, Task, BackendContext);
1613+
1614+
return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex,
1615+
ImportList, DefinedGlobals, &ModuleMap,
1616+
/*CodeGenOnly=*/true);
1617+
}
1618+
};
15621619
} // end anonymous namespace
15631620

15641621
ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
@@ -1879,10 +1936,46 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
18791936
return BackendProcess->wait();
18801937
};
18811938

1882-
std::unique_ptr<ThinBackendProc> BackendProc =
1883-
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
1884-
AddStream, Cache);
1885-
return RunBackends(BackendProc.get());
1939+
if (!CodeGenDataThinLTOTwoRounds) {
1940+
std::unique_ptr<ThinBackendProc> BackendProc =
1941+
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
1942+
AddStream, Cache);
1943+
return RunBackends(BackendProc.get());
1944+
}
1945+
1946+
// Perform two rounds of code generation for ThinLTO:
1947+
// 1. First round: Run optimization and code generation with a scratch output.
1948+
// 2. Merge codegen data extracted from the scratch output.
1949+
// 3. Second round: Run code generation again using the merged data.
1950+
LLVM_DEBUG(dbgs() << "Running ThinLTO two-codegen rounds\n");
1951+
1952+
// Initialize a temporary path to store and retrieve optimized IRs for
1953+
// two-round code generation.
1954+
cgdata::initializeTwoCodegenRounds();
1955+
1956+
// Create a scratch output to hold intermediate results.
1957+
auto Outputs =
1958+
std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks());
1959+
auto FirstRoundLTO = std::make_unique<NoOutputThinBackend>(
1960+
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
1961+
ModuleToDefinedGVSummaries, std::move(Outputs));
1962+
// First round: Run optimization and code generation with a scratch output.
1963+
// Before code generation, serialize modules.
1964+
if (Error E = RunBackends(FirstRoundLTO.get()))
1965+
return E;
1966+
1967+
// Merge codegen data extracted from the scratch output.
1968+
if (Error E = cgdata::mergeCodeGenData(std::move(FirstRoundLTO->Scratch)))
1969+
return E;
1970+
1971+
// Second round: Run code generation by reading IRs.
1972+
std::unique_ptr<ThinBackendProc> SecondRoundLTO =
1973+
std::make_unique<OptimizedBitcodeThinBackend>(
1974+
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
1975+
ModuleToDefinedGVSummaries, AddStream);
1976+
Error E = RunBackends(SecondRoundLTO.get());
1977+
1978+
return E;
18861979
}
18871980

18881981
Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(

llvm/lib/LTO/LTOBackend.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/Analysis/TargetLibraryInfo.h"
2121
#include "llvm/Bitcode/BitcodeReader.h"
2222
#include "llvm/Bitcode/BitcodeWriter.h"
23+
#include "llvm/CGData/CodeGenData.h"
2324
#include "llvm/IR/LLVMRemarkStreamer.h"
2425
#include "llvm/IR/LegacyPassManager.h"
2526
#include "llvm/IR/PassManager.h"
@@ -74,6 +75,8 @@ static cl::opt<bool> ThinLTOAssumeMerged(
7475
cl::desc("Assume the input has already undergone ThinLTO function "
7576
"importing and the other pre-optimization pipeline changes."));
7677

78+
extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
79+
7780
namespace llvm {
7881
extern cl::opt<bool> NoPGOWarnMismatch;
7982
}
@@ -599,11 +602,19 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
599602
auto OptimizeAndCodegen =
600603
[&](Module &Mod, TargetMachine *TM,
601604
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
605+
// Perform optimization and code generation for ThinLTO.
602606
if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
603607
/*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
604608
CmdArgs))
605609
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
606610

611+
// Save the current module before the first codegen round.
612+
// Note that the second codegen round runs only `codegen()` without
613+
// running `opt()`. We're not reaching here as it's bailed out earlier
614+
// with CodeGenOnly which has been set in `OptimizedBitcodeThinBackend`.
615+
if (CodeGenDataThinLTOTwoRounds)
616+
cgdata::saveModuleForTwoRounds(Mod, Task);
617+
607618
codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
608619
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
609620
};

0 commit comments

Comments
 (0)