Skip to content

[CGData] LLD for MachO #90166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lld/MachO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ struct Configuration {
std::vector<SectionAlign> sectionAlignments;
std::vector<SegmentProtection> segmentProtections;
bool ltoDebugPassManager = false;
llvm::StringRef codegenDataGeneratePath;
bool csProfileGenerate = false;
llvm::StringRef csProfilePath;
bool pgoWarnMismatch;
Expand Down
38 changes: 38 additions & 0 deletions lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/CGData/CodeGenDataWriter.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
Expand Down Expand Up @@ -1322,6 +1323,37 @@ static void gatherInputSections() {
}
}

static void codegenDataGenerate() {
TimeTraceScope timeScope("Generating codegen data");

OutlinedHashTreeRecord globalOutlineRecord;
for (ConcatInputSection *isec : inputSections)
if (isec->getSegName() == segment_names::data &&
isec->getName() == section_names::outlinedHashTree) {
// Read outlined hash tree from each section.
OutlinedHashTreeRecord localOutlineRecord;
auto *data = isec->data.data();
localOutlineRecord.deserialize(data);

// Merge it to the global hash tree.
globalOutlineRecord.merge(localOutlineRecord);
}

CodeGenDataWriter Writer;
if (!globalOutlineRecord.empty())
Writer.addRecord(globalOutlineRecord);

std::error_code EC;
auto fileName = config->codegenDataGeneratePath;
assert(!fileName.empty());
raw_fd_ostream Output(fileName, EC, sys::fs::OF_None);
if (EC)
error("fail to create " + fileName + ": " + EC.message());

if (auto E = Writer.write(Output))
error("fail to write CGData: " + toString(std::move(E)));
}

static void foldIdenticalLiterals() {
TimeTraceScope timeScope("Fold identical literals");
// We always create a cStringSection, regardless of whether dedupLiterals is
Expand Down Expand Up @@ -1759,6 +1791,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->ignoreAutoLinkOptions.insert(arg->getValue());
config->strictAutoLink = args.hasArg(OPT_strict_auto_link);
config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
config->codegenDataGeneratePath =
args.getLastArgValue(OPT_codegen_data_generate_path);
config->csProfileGenerate = args.hasArg(OPT_cs_profile_generate);
config->csProfilePath = args.getLastArgValue(OPT_cs_profile_path);
config->pgoWarnMismatch =
Expand Down Expand Up @@ -2103,6 +2137,10 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
}

gatherInputSections();

if (!config->codegenDataGeneratePath.empty())
codegenDataGenerate();

if (config->callGraphProfileSort)
priorityBuilder.extractCallGraphProfile();

Expand Down
1 change: 1 addition & 0 deletions lld/MachO/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ constexpr const char objcMethname[] = "__objc_methname";
constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
constexpr const char objcProtoList[] = "__objc_protolist";
constexpr const char outlinedHashTree[] = "__llvm_outline";
constexpr const char pageZero[] = "__pagezero";
constexpr const char pointers[] = "__pointers";
constexpr const char rebase[] = "__rebase";
Expand Down
4 changes: 4 additions & 0 deletions lld/MachO/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ def no_objc_category_merging : Flag<["-"], "no_objc_category_merging">,
Group<grp_lld>;
def lto_debug_pass_manager: Flag<["--"], "lto-debug-pass-manager">,
HelpText<"Debug new pass manager">, Group<grp_lld>;
def codegen_data_generate_path : Separate<["--"], "codegen-data-generate-path">, Group<grp_lld>;
def codegen_data_generate_path_eq : Joined<["--"], "codegen-data-generate-path=">,
Alias<!cast<Separate>(codegen_data_generate_path)>, MetaVarName<"<cgdata>">,
HelpText<"Write the CG data to the specified path <cgdata>.">, Group<grp_lld>;
def cs_profile_generate: Flag<["--"], "cs-profile-generate">,
HelpText<"Perform context sensitive PGO instrumentation">, Group<grp_lld>;
def cs_profile_path: Joined<["--"], "cs-profile-path=">,
Expand Down
1 change: 1 addition & 0 deletions lld/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ if (NOT LLD_BUILT_STANDALONE)
llvm-ar
llvm-as
llvm-bcanalyzer
llvm-cgdata
llvm-config
llvm-cvtres
llvm-dis
Expand Down
89 changes: 89 additions & 0 deletions lld/test/MachO/cgdata-generate.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# UNSUPPORTED: system-windows
# REQUIRES: aarch64

# RUN: rm -rf %t; split-file %s %t

# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/ \+/ /g; s/^ *//; s/ *$//; s/ /,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/ \+/ /g; s/^ *//; s/ *$//; s/ /,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s

# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o
# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-2.s -o %t/merge-2.o
# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/main.s -o %t/main.o

# This checks if the codegen data from the linker is identical to the merged codegen data
# from each object file, which is obtained using the llvm-cgdata tool.
# RUN: %no-arg-lld -dylib -arch arm64 -platform_version ios 14.0 15.0 -o %t/out \
# RUN: %t/merge-1.o %t/merge-2.o %t/main.o --codegen-data-generate-path=%t/out-cgdata
# RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o %t/main.o -o %t/merge-cgdata
# RUN: diff %t/out-cgdata %t/merge-cgdata

# Merge order doesn't matter. `main.o` is dropped due to missing __llvm_outline.
# RUN: llvm-cgdata --merge %t/merge-2.o %t/merge-1.o -o %t/merge-cgdata-shuffle
# RUN: diff %t/out-cgdata %t/merge-cgdata-shuffle

# We can also generate the merged codegen data from the executable that is not dead-stripped.
# RUN: llvm-objdump -h %t/out| FileCheck %s
# CHECK: __llvm_outline
# RUN: llvm-cgdata --merge %t/out -o %t/merge-cgdata-exe
# RUN: diff %t/merge-cgdata-exe %t/merge-cgdata

# Dead-strip will remove __llvm_outline sections from the final executable.
# But the codeden data is still correctly produced from the linker.
# RUN: %no-arg-lld -dylib -arch arm64 -platform_version ios 14.0 15.0 -o %t/out-strip \
# RUN: %t/merge-1.o %t/merge-2.o %t/main.o -dead_strip --codegen-data-generate-path=%t/out-cgdata-strip
# RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o %t/main.o -o %t/merge-cgdata-strip
# RUN: diff %t/out-cgdata-strip %t/merge-cgdata-strip
# RUN: diff %t/out-cgdata-strip %t/merge-cgdata

# Ensure no __llvm_outline section remains in the executable.
# RUN: llvm-objdump -h %t/out-strip | FileCheck %s --check-prefix=STRIP
# STRIP-NOT: __llvm_outline

#--- raw-1.cgtext
:outlined_hash_tree
0:
Hash: 0x0
Terminals: 0
SuccessorIds: [ 1 ]
1:
Hash: 0x1
Terminals: 0
SuccessorIds: [ 2 ]
2:
Hash: 0x2
Terminals: 4
SuccessorIds: [ ]
...

#--- raw-2.cgtext
:outlined_hash_tree
0:
Hash: 0x0
Terminals: 0
SuccessorIds: [ 1 ]
1:
Hash: 0x1
Terminals: 0
SuccessorIds: [ 2 ]
2:
Hash: 0x3
Terminals: 5
SuccessorIds: [ ]
...

#--- merge-template.s
.section __DATA,__llvm_outline
_data:
.byte <RAW_BYTES>

#--- main.s
.globl _main

.text
_main:
ret
1 change: 1 addition & 0 deletions lld/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
tool_patterns = [
"llc",
"llvm-as",
"llvm-cgdata",
"llvm-mc",
"llvm-nm",
"llvm-objdump",
Expand Down
Loading