Skip to content

Commit fa3489a

Browse files
committed
Address comments from teresajohnson
1 parent c5d3379 commit fa3489a

File tree

11 files changed

+452
-102
lines changed

11 files changed

+452
-102
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,10 +1321,11 @@ static void runThinLTOBackend(
13211321
Conf.CGFileType = getCodeGenFileType(Action);
13221322
break;
13231323
}
1324-
if (Error E = thinBackend(
1325-
Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
1326-
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
1327-
/* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) {
1324+
if (Error E =
1325+
thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
1326+
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
1327+
/*ModuleMap=*/nullptr, Conf.CodeGenOnly,
1328+
/*IRAddStream=*/nullptr, CGOpts.CmdArgs)) {
13281329
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
13291330
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
13301331
});

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
#define LLVM_CGDATA_CODEGENDATA_H
1616

1717
#include "llvm/ADT/BitmaskEnum.h"
18+
#include "llvm/ADT/StableHashing.h"
1819
#include "llvm/Bitcode/BitcodeReader.h"
1920
#include "llvm/CGData/OutlinedHashTree.h"
2021
#include "llvm/CGData/OutlinedHashTreeRecord.h"
2122
#include "llvm/IR/Module.h"
2223
#include "llvm/Object/ObjectFile.h"
24+
#include "llvm/Support/Caching.h"
2325
#include "llvm/Support/ErrorHandling.h"
2426
#include "llvm/TargetParser/Triple.h"
2527
#include <mutex>
@@ -164,22 +166,60 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
164166
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
165167
}
166168

167-
void initializeTwoCodegenRounds();
169+
struct StreamCacheData {
170+
/// Backing buffer for serialized data stream.
171+
SmallVector<SmallString<0>> Outputs;
172+
/// Callback function to add serialized data to the stream.
173+
AddStreamFn AddStream;
174+
/// Backing buffer for cached data.
175+
SmallVector<std::unique_ptr<MemoryBuffer>> Files;
176+
/// Cache mechanism for storing and retrieving data.
177+
FileCache Cache;
178+
179+
StreamCacheData(unsigned Size) : Outputs(Size), Files(Size) {}
180+
StreamCacheData() = delete;
181+
182+
/// Retrieve results from either the cache or the stream.
183+
SmallVector<StringRef> getResult() {
184+
unsigned NumOutputs = Outputs.size();
185+
SmallVector<StringRef> Result(NumOutputs);
186+
for (unsigned I = 0; I < NumOutputs; ++I)
187+
if (Files[I])
188+
Result[I] = Files[I]->getBuffer();
189+
else
190+
Result[I] = Outputs[I];
191+
return Result;
192+
}
193+
};
194+
195+
/// Establish additional streams and caches for accessing object and IR files.
196+
/// \p OrigCache refers to the original cache used for accessing the final
197+
/// object files, which has already been configured and provided by the linker,
198+
/// if applicable. This cache will be utilized during the second round of the
199+
/// run. Additionally, we add two more caches at the same location for the first
200+
/// round of the run.
201+
void initializeTwoCodegenRounds(StreamCacheData &CG, StreamCacheData &IR,
202+
const FileCache &OrigCache);
168203

169204
/// Save \p TheModule before the first codegen round.
170205
/// \p Task represents the partition number in the parallel code generation
171206
/// process.
172-
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
207+
/// \p AddStream is the callback used to add the serialized module to the
208+
/// stream.
209+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
210+
AddStreamFn AddStream);
173211

174212
/// Load the optimized module before the second codegen round.
175213
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
176214
unsigned Task,
177-
LLVMContext &Context);
215+
LLVMContext &Context,
216+
ArrayRef<StringRef> IRFiles);
178217

179218
/// Merge the codegen data from the input files in scratch vector in ThinLTO
180-
/// two-codegen rounds.
181-
Error mergeCodeGenData(
182-
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);
219+
/// two-codegen rounds. Optionally, \p CombinedHash can be used to compuate
220+
/// the combined hash of the merged data.
221+
Error mergeCodeGenData(ArrayRef<StringRef> CGFiles,
222+
stable_hash *CombinedHash = nullptr);
183223

184224
void warn(Error E, StringRef Whence = "");
185225
void warn(Twine Message, std::string Whence = "", std::string Hint = "");

llvm/include/llvm/CGData/CodeGenDataReader.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,11 @@ class CodeGenDataReader {
5454
/// Extract the cgdata embedded in sections from the given object file and
5555
/// merge them into the GlobalOutlineRecord. This is a static helper that
5656
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
57+
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
58+
/// the merged data.
5759
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
58-
OutlinedHashTreeRecord &GlobalOutlineRecord);
60+
OutlinedHashTreeRecord &GlobalOutlineRecord,
61+
stable_hash *CombinedHash = nullptr);
5962

6063
protected:
6164
/// The outlined hash tree that has been read. When it's released by

llvm/include/llvm/LTO/LTO.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,17 @@ void thinLTOInternalizeAndPromoteInIndex(
6464
isPrevailing);
6565

6666
/// Computes a unique hash for the Module considering the current list of
67-
/// export/import and other global analysis results.
67+
/// export/import and other global analysis results. Optionally, \p ExtraID
68+
/// can be used to add an extra identifier to the hash.
6869
std::string computeLTOCacheKey(
6970
const lto::Config &Conf, const ModuleSummaryIndex &Index,
7071
StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
7172
const FunctionImporter::ExportSetTy &ExportList,
7273
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
7374
const GVSummaryMapTy &DefinedGlobals,
7475
const DenseSet<GlobalValue::GUID> &CfiFunctionDefs = {},
75-
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {});
76+
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {},
77+
StringRef ExtraID = {});
7678

7779
namespace lto {
7880

llvm/include/llvm/LTO/LTOBackend.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ Error backend(const Config &C, AddStreamFn AddStream,
5151
/// are saved in the ModuleMap. If \p ModuleMap is nullptr, module files will
5252
/// be mapped to memory on demand and at any given time during importing, only
5353
/// one source module will be kept open at the most. If \p CodeGenOnly is true,
54-
/// the backend will skip optimization and only perform code generation.
54+
/// the backend will skip optimization and only perform code generation. If
55+
/// \p IRAddStream is not nullptr, it will be called just before code generation
56+
/// to serialize the optimized IR.
5557
Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
5658
Module &M, const ModuleSummaryIndex &CombinedIndex,
5759
const FunctionImporter::ImportMapTy &ImportList,
5860
const GVSummaryMapTy &DefinedGlobals,
5961
MapVector<StringRef, BitcodeModule> *ModuleMap,
60-
bool CodeGenOnly,
62+
bool CodeGenOnly, AddStreamFn IRAddStream = nullptr,
6163
const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());
6264

6365
Error finalizeOptimizationRemarks(

llvm/lib/CGData/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ add_llvm_component_library(LLVMCGData
1212
intrinsics_gen
1313

1414
LINK_COMPONENTS
15+
BitReader
16+
BitWriter
1517
Core
1618
Support
1719
Object

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 58 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/CGData/CodeGenDataReader.h"
1616
#include "llvm/CGData/OutlinedHashTreeRecord.h"
1717
#include "llvm/Object/ObjectFile.h"
18+
#include "llvm/Support/Caching.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include "llvm/Support/FileSystem.h"
2021
#include "llvm/Support/Path.h"
@@ -37,9 +38,6 @@ cl::opt<bool> CodeGenDataThinLTOTwoRounds(
3738
"emits codegen data, while the second round uses the emitted "
3839
"codegen data for further optimizations."));
3940

40-
// Path to where the optimized bitcodes are saved and restored for ThinLTO.
41-
static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;
42-
4341
static std::string getCGDataErrString(cgdata_error Err,
4442
const std::string &ErrMsg = "") {
4543
std::string Msg;
@@ -224,59 +222,78 @@ void warn(Error E, StringRef Whence) {
224222
}
225223
}
226224

227-
static std::string getPath(StringRef Dir, unsigned Task) {
228-
llvm::SmallString<128> Path(Dir);
229-
llvm::sys::path::append(Path, llvm::Twine(Task) + ".saved_copy.bc");
230-
return std::string(Path);
231-
}
232-
233-
void initializeTwoCodegenRounds() {
225+
void initializeTwoCodegenRounds(StreamCacheData &CG, StreamCacheData &IR,
226+
const FileCache &OrigCache) {
234227
assert(CodeGenDataThinLTOTwoRounds);
235-
if (auto EC = llvm::sys::fs::createUniqueDirectory(
236-
"cgdata", CodeGenDataThinLTOTwoRoundsPath))
237-
report_fatal_error(Twine("Failed to create directory: ") + EC.message());
228+
CG.AddStream = [&](size_t Task, const Twine &ModuleName) {
229+
return std::make_unique<CachedFileStream>(
230+
std::make_unique<raw_svector_ostream>(CG.Outputs[Task]));
231+
};
232+
IR.AddStream = [&](size_t Task, const Twine &ModuleName) {
233+
return std::make_unique<CachedFileStream>(
234+
std::make_unique<raw_svector_ostream>(IR.Outputs[Task]));
235+
};
236+
237+
if (OrigCache.isValid()) {
238+
auto CGCacheOrErr =
239+
localCache("ThinLTO", "CG", OrigCache.getCacheDirectoryPath(),
240+
[&](size_t Task, const Twine &ModuleName,
241+
std::unique_ptr<MemoryBuffer> MB) {
242+
CG.Files[Task] = std::move(MB);
243+
});
244+
if (Error Err = CGCacheOrErr.takeError())
245+
report_fatal_error(std::move(Err));
246+
CG.Cache = std::move(*CGCacheOrErr);
247+
auto IRCacheOrErr =
248+
localCache("ThinLTO", "IR", OrigCache.getCacheDirectoryPath(),
249+
[&](size_t Task, const Twine &NoduleName,
250+
std::unique_ptr<MemoryBuffer> MB) {
251+
IR.Files[Task] = std::move(MB);
252+
});
253+
if (Error Err = IRCacheOrErr.takeError())
254+
report_fatal_error(std::move(Err));
255+
IR.Cache = std::move(*IRCacheOrErr);
256+
}
238257
}
239258

240-
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
241-
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
242-
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
243-
std::error_code EC;
244-
raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
245-
if (EC)
246-
report_fatal_error(Twine("Failed to open ") + Path +
247-
" to save optimized bitcode: " + EC.message());
248-
WriteBitcodeToFile(TheModule, OS, /*ShouldPreserveUseListOrder=*/true);
259+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
260+
AddStreamFn AddStream) {
261+
LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
262+
<< " in Task " << Task << "\n");
263+
Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
264+
AddStream(Task, TheModule.getModuleIdentifier());
265+
if (Error Err = StreamOrErr.takeError())
266+
report_fatal_error(std::move(Err));
267+
std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
268+
269+
WriteBitcodeToFile(TheModule, *Stream->OS,
270+
/*ShouldPreserveUseListOrder=*/true);
249271
}
250272

251273
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
252274
unsigned Task,
253-
LLVMContext &Context) {
254-
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
255-
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
256-
auto FileOrError = MemoryBuffer::getFile(Path);
257-
if (auto EC = FileOrError.getError())
258-
report_fatal_error(Twine("Failed to open ") + Path +
259-
" to load optimized bitcode: " + EC.message());
260-
261-
std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
275+
LLVMContext &Context,
276+
ArrayRef<StringRef> IRFiles) {
277+
LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
278+
<< " in Task " << Task << "\n");
279+
std::unique_ptr<MemoryBuffer> FileBuffer = MemoryBuffer::getMemBuffer(
280+
IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
262281
auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
263282
if (!RestoredModule)
264-
report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
265-
Path + "\n");
283+
report_fatal_error(
284+
Twine("Failed to parse optimized bitcode loaded for Task: ") +
285+
Twine(Task) + "\n");
266286

267287
// Restore the original module identifier.
268288
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
269289
return std::move(*RestoredModule);
270290
}
271291

272-
Error mergeCodeGenData(
273-
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {
274-
292+
Error mergeCodeGenData(ArrayRef<StringRef> CGFiles, stable_hash *CombinedHash) {
275293
OutlinedHashTreeRecord GlobalOutlineRecord;
276-
for (auto &InputFile : *(InputFiles)) {
277-
if (InputFile.empty())
294+
for (auto File : CGFiles) {
295+
if (File.empty())
278296
continue;
279-
StringRef File = StringRef(InputFile.data(), InputFile.size());
280297
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
281298
File, "in-memory object file", /*RequiresNullTerminator=*/false);
282299
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
@@ -285,8 +302,8 @@ Error mergeCodeGenData(
285302
return BinOrErr.takeError();
286303

287304
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
288-
if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
289-
GlobalOutlineRecord))
305+
if (auto E = CodeGenDataReader::mergeFromObjectFile(
306+
Obj.get(), GlobalOutlineRecord, CombinedHash))
290307
return E;
291308
}
292309

llvm/lib/CGData/CodeGenDataReader.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
3131
}
3232

3333
Error CodeGenDataReader::mergeFromObjectFile(
34-
const object::ObjectFile *Obj,
35-
OutlinedHashTreeRecord &GlobalOutlineRecord) {
34+
const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
35+
stable_hash *CombinedHash) {
3636
Triple TT = Obj->makeTriple();
3737
auto CGOutLineName =
3838
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
@@ -48,6 +48,9 @@ Error CodeGenDataReader::mergeFromObjectFile(
4848
auto *EndData = Data + ContentsOrErr->size();
4949

5050
if (*NameOrErr == CGOutLineName) {
51+
if (CombinedHash)
52+
*CombinedHash =
53+
stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
5154
// In case dealing with an executable that has concatenated cgdata,
5255
// we want to merge them into a single cgdata.
5356
// Although it's not a typical workflow, we support this scenario.

0 commit comments

Comments
 (0)