|
21 | 21 | #include "llvm/Analysis/TargetTransformInfo.h"
|
22 | 22 | #include "llvm/Bitcode/BitcodeReader.h"
|
23 | 23 | #include "llvm/Bitcode/BitcodeWriter.h"
|
| 24 | +#include "llvm/CGData/CodeGenData.h" |
24 | 25 | #include "llvm/CodeGen/Analysis.h"
|
25 | 26 | #include "llvm/Config/llvm-config.h"
|
26 | 27 | #include "llvm/IR/AutoUpgrade.h"
|
@@ -70,6 +71,8 @@ static cl::opt<bool>
|
70 | 71 | DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
|
71 | 72 | cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
|
72 | 73 |
|
| 74 | +extern cl::opt<bool> CodeGenDataThinLTOTwoRounds; |
| 75 | + |
73 | 76 | namespace llvm {
|
74 | 77 | /// Enable global value internalization in LTO.
|
75 | 78 | cl::opt<bool> EnableLTOInternalization(
|
@@ -1458,7 +1461,7 @@ class InProcessThinBackend : public ThinBackendProc {
|
1458 | 1461 | GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
|
1459 | 1462 | }
|
1460 | 1463 |
|
1461 |
| - Error runThinLTOBackendThread( |
| 1464 | + virtual Error runThinLTOBackendThread( |
1462 | 1465 | AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
|
1463 | 1466 | ModuleSummaryIndex &CombinedIndex,
|
1464 | 1467 | const FunctionImporter::ImportMapTy &ImportList,
|
@@ -1559,6 +1562,60 @@ class InProcessThinBackend : public ThinBackendProc {
|
1559 | 1562 | return BackendThreadPool.getMaxConcurrency();
|
1560 | 1563 | }
|
1561 | 1564 | };
|
| 1565 | + |
| 1566 | +/// This Backend will run ThinBackend process but throw away all the output from |
| 1567 | +/// the codegen. This class facilitates the first codegen round. |
| 1568 | +class NoOutputThinBackend : public InProcessThinBackend { |
| 1569 | +public: |
| 1570 | + NoOutputThinBackend( |
| 1571 | + const Config &Conf, ModuleSummaryIndex &CombinedIndex, |
| 1572 | + ThreadPoolStrategy ThinLTOParallelism, |
| 1573 | + const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, |
| 1574 | + std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch) |
| 1575 | + : InProcessThinBackend( |
| 1576 | + Conf, CombinedIndex, ThinLTOParallelism, ModuleToDefinedGVSummaries, |
| 1577 | + // Allocate a scratch buffer for each task to write output to. |
| 1578 | + [Allocation = &*Scratch](unsigned Task, const Twine &ModuleName) { |
| 1579 | + return std::make_unique<CachedFileStream>( |
| 1580 | + std::make_unique<raw_svector_ostream>((*Allocation)[Task])); |
| 1581 | + }, |
| 1582 | + FileCache(), nullptr, false, false), |
| 1583 | + Scratch(std::move(Scratch)) {} |
| 1584 | + |
| 1585 | + /// Scratch space for writing output during the codegen. |
| 1586 | + std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch; |
| 1587 | +}; |
| 1588 | + |
| 1589 | +/// This Backend performs codegen on bitcode that was previously saved after |
| 1590 | +/// going through optimization. This class facilitates the second codegen round. |
| 1591 | +class OptimizedBitcodeThinBackend : public InProcessThinBackend { |
| 1592 | +public: |
| 1593 | + OptimizedBitcodeThinBackend( |
| 1594 | + const Config &Conf, ModuleSummaryIndex &CombinedIndex, |
| 1595 | + ThreadPoolStrategy ThinLTOParallelism, |
| 1596 | + const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, |
| 1597 | + AddStreamFn AddStream) |
| 1598 | + : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism, |
| 1599 | + ModuleToDefinedGVSummaries, AddStream, FileCache(), |
| 1600 | + nullptr, false, false) {} |
| 1601 | + |
| 1602 | + virtual Error runThinLTOBackendThread( |
| 1603 | + AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM, |
| 1604 | + ModuleSummaryIndex &CombinedIndex, |
| 1605 | + const FunctionImporter::ImportMapTy &ImportList, |
| 1606 | + const FunctionImporter::ExportSetTy &ExportList, |
| 1607 | + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, |
| 1608 | + const GVSummaryMapTy &DefinedGlobals, |
| 1609 | + MapVector<StringRef, BitcodeModule> &ModuleMap) override { |
| 1610 | + LTOLLVMContext BackendContext(Conf); |
| 1611 | + std::unique_ptr<Module> LoadedModule = |
| 1612 | + cgdata::loadModuleForTwoRounds(BM, Task, BackendContext); |
| 1613 | + |
| 1614 | + return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex, |
| 1615 | + ImportList, DefinedGlobals, &ModuleMap, |
| 1616 | + /*CodeGenOnly=*/true); |
| 1617 | + } |
| 1618 | +}; |
1562 | 1619 | } // end anonymous namespace
|
1563 | 1620 |
|
1564 | 1621 | ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
|
@@ -1879,10 +1936,46 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
|
1879 | 1936 | return BackendProcess->wait();
|
1880 | 1937 | };
|
1881 | 1938 |
|
1882 |
| - std::unique_ptr<ThinBackendProc> BackendProc = |
1883 |
| - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, |
1884 |
| - AddStream, Cache); |
1885 |
| - return RunBackends(BackendProc.get()); |
| 1939 | + if (!CodeGenDataThinLTOTwoRounds) { |
| 1940 | + std::unique_ptr<ThinBackendProc> BackendProc = |
| 1941 | + ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, |
| 1942 | + AddStream, Cache); |
| 1943 | + return RunBackends(BackendProc.get()); |
| 1944 | + } |
| 1945 | + |
| 1946 | + // Perform two rounds of code generation for ThinLTO: |
| 1947 | + // 1. First round: Run optimization and code generation with a scratch output. |
| 1948 | + // 2. Merge codegen data extracted from the scratch output. |
| 1949 | + // 3. Second round: Run code generation again using the merged data. |
| 1950 | + LLVM_DEBUG(dbgs() << "Running ThinLTO two-codegen rounds\n"); |
| 1951 | + |
| 1952 | + // Initialize a temporary path to store and retrieve optimized IRs for |
| 1953 | + // two-round code generation. |
| 1954 | + cgdata::initializeTwoCodegenRounds(); |
| 1955 | + |
| 1956 | + // Create a scratch output to hold intermediate results. |
| 1957 | + auto Outputs = |
| 1958 | + std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks()); |
| 1959 | + auto FirstRoundLTO = std::make_unique<NoOutputThinBackend>( |
| 1960 | + Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(), |
| 1961 | + ModuleToDefinedGVSummaries, std::move(Outputs)); |
| 1962 | + // First round: Run optimization and code generation with a scratch output. |
| 1963 | + // Before code generation, serialize modules. |
| 1964 | + if (Error E = RunBackends(FirstRoundLTO.get())) |
| 1965 | + return E; |
| 1966 | + |
| 1967 | + // Merge codegen data extracted from the scratch output. |
| 1968 | + if (Error E = cgdata::mergeCodeGenData(std::move(FirstRoundLTO->Scratch))) |
| 1969 | + return E; |
| 1970 | + |
| 1971 | + // Second round: Run code generation by reading IRs. |
| 1972 | + std::unique_ptr<ThinBackendProc> SecondRoundLTO = |
| 1973 | + std::make_unique<OptimizedBitcodeThinBackend>( |
| 1974 | + Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(), |
| 1975 | + ModuleToDefinedGVSummaries, AddStream); |
| 1976 | + Error E = RunBackends(SecondRoundLTO.get()); |
| 1977 | + |
| 1978 | + return E; |
1886 | 1979 | }
|
1887 | 1980 |
|
1888 | 1981 | Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
|
|
0 commit comments