Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions llvm/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,20 @@ add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED)

# Extract the list of symbols in a random utility as sample data.
set(SYMBOL_TEST_DATA_FILE "sample_symbol_list.txt")
set(SYMBOL_TEST_DATA_SOURCE_BINARY $<TARGET_FILE:llc>)

add_custom_command(OUTPUT ${SYMBOL_TEST_DATA_FILE}
COMMAND $<TARGET_FILE:llvm-nm> --no-demangle --no-sort
Copy link
Contributor

@s-barannikov s-barannikov Aug 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File dependencies don't work well in CMake. I think this should be

Suggested change
COMMAND $<TARGET_FILE:llvm-nm> --no-demangle --no-sort
COMMAND llvm-nm --no-demangle --no-sort

and

  DEPENDS llvm-nm llc

CMake will recognize the names if they are existing targets and add proper dependencies (COMMAND should add target-level dependencies, DEPENDS should add file-level dependencies). Not sure what llc dependency is for here.
It might still need a check if the target exists if (TARGET llvm-nm AND TARGET llc).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the command part still needs to be TARGET_FILE, but yes the depends should be in target terms

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a CMake guru, so I blindly trust the documentation:

If COMMAND specifies an executable target name (created by the add_executable() command), it will automatically be replaced by the location of the executable created at build time if either of the following is true:

  • The target is not being cross-compiled <...>.
  • The target is being cross-compiled and an emulator is provided <...>.

--format=just-symbols
${SYMBOL_TEST_DATA_SOURCE_BINARY} > ${SYMBOL_TEST_DATA_FILE}
DEPENDS "$<TARGET_FILE:llvm-nm>" "$<TARGET_FILE:llc>")

add_custom_target(generate-runtime-libcalls-sample-symbol-list
DEPENDS ${SYMBOL_TEST_DATA_FILE})
add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED)

add_dependencies(RuntimeLibcallsBench generate-runtime-libcalls-sample-symbol-list)
target_compile_definitions(RuntimeLibcallsBench PRIVATE
-DSYMBOL_TEST_DATA_FILE="${CMAKE_CURRENT_BINARY_DIR}/${SYMBOL_TEST_DATA_FILE}")
116 changes: 116 additions & 0 deletions llvm/benchmarks/RuntimeLibcalls.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/IR/RuntimeLibcalls.h"
#include "benchmark/benchmark.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TargetParser/Triple.h"
#include <random>
#include <string>
using namespace llvm;

static constexpr unsigned MaxFuncNameSize = 53;

static std::vector<StringRef> getLibcallNameStringRefs() {
std::vector<StringRef> Names(RTLIB::NumLibcallImpls);
// Keep the strlens on the StringRef construction out of the benchmark loop.
for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) {
const char *Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC);
Names[LC] = StringRef(Name);
}

return Names;
}

static std::vector<std::string> getRandomFuncNames() {
std::mt19937_64 Rng;
std::uniform_int_distribution<> StringLengthDistribution(1, MaxFuncNameSize);
std::uniform_int_distribution<> CharDistribution(1, 255);
int NumTestFuncs = 1 << 10;
std::vector<std::string> TestFuncNames(NumTestFuncs);

for (std::string &TestFuncName : TestFuncNames) {
for (int I = 0, E = StringLengthDistribution(Rng); I != E; ++I)
TestFuncName += static_cast<char>(CharDistribution(Rng));
}

return TestFuncNames;
}

static std::vector<std::string> readSymbolsFromFile(StringRef InputFile) {
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile, /*IsText=*/true);
if (!BufOrError) {
reportFatalUsageError("failed to open \'" + Twine(InputFile) +
"\': " + BufOrError.getError().message());
}

// Hackily figure out if there's a prefix on the symbol names - llvm-nm
// appears to not have a flag to skip this.
llvm::Triple HostTriple(LLVM_HOST_TRIPLE);
std::string DummyDatalayout = "e";
DummyDatalayout += DataLayout::getManglingComponent(HostTriple);

DataLayout DL(DummyDatalayout);
char GlobalPrefix = DL.getGlobalPrefix();

std::vector<std::string> Lines;
for (line_iterator LineIt(**BufOrError, /*SkipBlanks=*/true);
!LineIt.is_at_eof(); ++LineIt) {
StringRef SymbolName = *LineIt;
SymbolName.consume_front(StringRef(&GlobalPrefix, 1));

Lines.push_back(SymbolName.str());
}
return Lines;
}

static void BM_LookupRuntimeLibcallByNameKnownCalls(benchmark::State &State) {
std::vector<StringRef> Names = getLibcallNameStringRefs();

for (auto _ : State) {
for (StringRef Name : Names) {
benchmark::DoNotOptimize(
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name).empty());
}
}
}

static void BM_LookupRuntimeLibcallByNameRandomCalls(benchmark::State &State) {
std::vector<std::string> TestFuncNames = getRandomFuncNames();

for (auto _ : State) {
for (const std::string &Name : TestFuncNames) {
benchmark::DoNotOptimize(
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name))
.empty());
}
}
}

// This isn't fully representative, it doesn't include any anonymous functions.
// nm -n --no-demangle --format=just-symbols sample-binary > sample.txt
static void BM_LookupRuntimeLibcallByNameSampleData(benchmark::State &State) {
std::vector<std::string> TestFuncNames =
readSymbolsFromFile(SYMBOL_TEST_DATA_FILE);
for (auto _ : State) {
for (const std::string &Name : TestFuncNames) {
benchmark::DoNotOptimize(
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name))
.empty());
}
}
}

BENCHMARK(BM_LookupRuntimeLibcallByNameKnownCalls);
BENCHMARK(BM_LookupRuntimeLibcallByNameRandomCalls);
BENCHMARK(BM_LookupRuntimeLibcallByNameSampleData);

BENCHMARK_MAIN();
44 changes: 36 additions & 8 deletions llvm/include/llvm/IR/RuntimeLibcalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,41 @@ struct RuntimeLibcallsInfo {
return ImplToLibcall[Impl];
}

/// Check if a function name is a recognized runtime call of any kind. This
/// does not consider if this call is available for any current compilation,
/// just that it is a known call somewhere. This returns the set of all
/// LibcallImpls which match the name; multiple implementations with the same
/// name may exist but differ in interpretation based on the target context.
///
/// Generated by tablegen.
LLVM_ABI static inline iota_range<RTLIB::LibcallImpl>
lookupLibcallImplName(StringRef Name){
// Inlining the early exit on the string name appears to be worthwhile when
// querying a real set of symbols
#define GET_LOOKUP_LIBCALL_IMPL_NAME_BODY
#include "llvm/IR/RuntimeLibcalls.inc"
#undef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY
}

/// Check if this is valid libcall for the current module, otherwise
/// RTLIB::Unsupported.
LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const;
LLVM_ABI RTLIB::LibcallImpl
getSupportedLibcallImpl(StringRef FuncName) const {
for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) {
// FIXME: This should not depend on looking up ImplToLibcall, only the
// list of libcalls for the module.
RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]];
if (Recognized != RTLIB::Unsupported)
return Recognized;
}

return RTLIB::Unsupported;
}

private:
LLVM_ABI static iota_range<RTLIB::LibcallImpl>
lookupLibcallImplNameImpl(StringRef Name);

/// Stores the implementation choice for each each libcall.
RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = {
RTLIB::Unsupported};
Expand All @@ -157,13 +187,11 @@ struct RuntimeLibcallsInfo {
/// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind.
LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls];

/// Check if a function name is a recognized runtime call of any kind. This
/// does not consider if this call is available for any current compilation,
/// just that it is a known call somewhere. This returns the set of all
/// LibcallImpls which match the name; multiple implementations with the same
/// name may exist but differ in interpretation based on the target context.
LLVM_ABI static iterator_range<ArrayRef<uint16_t>::const_iterator>
getRecognizedLibcallImpls(StringRef FuncName);
/// Utility function for tablegenerated lookup function. Return a range of
/// enum values that apply for the function name at \p NameOffsetEntry with
/// the value \p StrOffset.
static inline iota_range<RTLIB::LibcallImpl>
libcallImplNameHit(uint16_t NameOffsetEntry, uint16_t StrOffset);

static bool darwinHasSinCosStret(const Triple &TT) {
if (!TT.isOSDarwin())
Expand Down
59 changes: 18 additions & 41 deletions llvm/lib/IR/RuntimeLibcalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "llvm/IR/RuntimeLibcalls.h"
#include "llvm/ADT/StringTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/xxhash.h"
#include "llvm/TargetParser/ARMTargetParser.h"

#define DEBUG_TYPE "runtime-libcalls-info"
Expand All @@ -18,9 +19,11 @@ using namespace RTLIB;

#define GET_INIT_RUNTIME_LIBCALL_NAMES
#define GET_SET_TARGET_RUNTIME_LIBCALL_SETS
#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
#include "llvm/IR/RuntimeLibcalls.inc"
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME

/// Set default libcall names. If a target wants to opt-out of a libcall it
/// should be placed here.
Expand Down Expand Up @@ -58,49 +61,23 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
}
}

RTLIB::LibcallImpl
RuntimeLibcallsInfo::getSupportedLibcallImpl(StringRef FuncName) const {
const ArrayRef<uint16_t> RuntimeLibcallNameOffsets(
RuntimeLibcallNameOffsetTable);

iterator_range<ArrayRef<uint16_t>::const_iterator> Range =
getRecognizedLibcallImpls(FuncName);

for (auto I = Range.begin(); I != Range.end(); ++I) {
RTLIB::LibcallImpl Impl =
static_cast<RTLIB::LibcallImpl>(I - RuntimeLibcallNameOffsets.begin());

// FIXME: This should not depend on looking up ImplToLibcall, only the list
// of libcalls for the module.
RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]];
if (Recognized != RTLIB::Unsupported)
return Recognized;
LLVM_ATTRIBUTE_ALWAYS_INLINE
iota_range<RTLIB::LibcallImpl>
RuntimeLibcallsInfo::libcallImplNameHit(uint16_t NameOffsetEntry,
uint16_t StrOffset) {
int NumAliases = 1;
for (uint16_t Entry : ArrayRef(RuntimeLibcallNameOffsetTable)
.drop_front(NameOffsetEntry + 1)) {
if (Entry != StrOffset)
break;
++NumAliases;
}

return RTLIB::Unsupported;
}

iterator_range<ArrayRef<uint16_t>::const_iterator>
RuntimeLibcallsInfo::getRecognizedLibcallImpls(StringRef FuncName) {
StringTable::Iterator It = lower_bound(RuntimeLibcallImplNameTable, FuncName);
if (It == RuntimeLibcallImplNameTable.end() || *It != FuncName)
return iterator_range(ArrayRef<uint16_t>());

uint16_t IndexVal = It.offset().value();
const ArrayRef<uint16_t> TableRef(RuntimeLibcallNameOffsetTable);

ArrayRef<uint16_t>::const_iterator E = TableRef.end();
ArrayRef<uint16_t>::const_iterator EntriesBegin =
std::lower_bound(TableRef.begin(), E, IndexVal);
ArrayRef<uint16_t>::const_iterator EntriesEnd = EntriesBegin;

while (EntriesEnd != E && *EntriesEnd == IndexVal)
++EntriesEnd;

assert(EntriesBegin != E &&
"libcall found in name table but not offset table");

return make_range(EntriesBegin, EntriesEnd);
RTLIB::LibcallImpl ImplStart = static_cast<RTLIB::LibcallImpl>(
&RuntimeLibcallNameOffsetTable[NameOffsetEntry] -
&RuntimeLibcallNameOffsetTable[0]);
return enum_seq(ImplStart,
static_cast<RTLIB::LibcallImpl>(ImplStart + NumAliases));
}

bool RuntimeLibcallsInfo::isAAPCS_ABI(const Triple &TT, StringRef ABIName) {
Expand Down
47 changes: 22 additions & 25 deletions llvm/lib/Object/IRSymtab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,18 @@ static cl::opt<bool> DisableBitcodeVersionUpgrade(
"disable-bitcode-version-upgrade", cl::Hidden,
cl::desc("Disable automatic bitcode upgrade for version mismatch"));

static const char *PreservedSymbols[] = {
static constexpr StringLiteral PreservedSymbols[] = {
// There are global variables, so put it here instead of in
// RuntimeLibcalls.td.
// TODO: Are there similar such variables?
"__ssp_canary_word",
"__stack_chk_guard",
};

static bool isPreservedGlobalVarName(StringRef Name) {
return PreservedSymbols[0] == Name || PreservedSymbols[1] == Name;
}

namespace {

const char *getExpectedProducerName() {
Expand Down Expand Up @@ -81,12 +85,16 @@ struct Builder {
// The StringTableBuilder does not create a copy of any strings added to it,
// so this provides somewhere to store any strings that we create.
Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
BumpPtrAllocator &Alloc)
: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
BumpPtrAllocator &Alloc, const Triple &TT)
: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc), TT(TT),
Libcalls(TT) {}

DenseMap<const Comdat *, int> ComdatMap;
Mangler Mang;
Triple TT;
const Triple &TT;

// FIXME: This shouldn't be here.
RTLIB::RuntimeLibcallsInfo Libcalls;

std::vector<storage::Comdat> Comdats;
std::vector<storage::Module> Mods;
Expand All @@ -98,6 +106,10 @@ struct Builder {

std::vector<storage::Str> DependentLibraries;

bool isPreservedLibFuncName(StringRef Name) {
return Libcalls.getSupportedLibcallImpl(Name) != RTLIB::Unsupported;
}

void setStr(storage::Str &S, StringRef Value) {
S.Offset = StrtabBuilder.add(Value);
S.Size = Value.size();
Expand Down Expand Up @@ -213,19 +225,6 @@ Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
return P.first->second;
}

static StringSet<> buildPreservedSymbolsSet(const Triple &TT) {
StringSet<> PreservedSymbolSet;
PreservedSymbolSet.insert(std::begin(PreservedSymbols),
std::end(PreservedSymbols));
// FIXME: Do we need to pass in ABI fields from TargetOptions?
RTLIB::RuntimeLibcallsInfo Libcalls(TT);
for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) {
if (Impl != RTLIB::Unsupported)
PreservedSymbolSet.insert(Libcalls.getLibcallImplName(Impl));
}
return PreservedSymbolSet;
}

Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
const SmallPtrSet<GlobalValue *, 4> &Used,
ModuleSymbolTable::Symbol Msym) {
Expand Down Expand Up @@ -279,13 +278,11 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
return Error::success();
}

setStr(Sym.IRName, GV->getName());

static const StringSet<> PreservedSymbolsSet =
buildPreservedSymbolsSet(GV->getParent()->getTargetTriple());
bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());
StringRef GVName = GV->getName();
setStr(Sym.IRName, GVName);

if (Used.count(GV) || IsPreservedSymbol)
if (Used.count(GV) || isPreservedLibFuncName(GVName) ||
isPreservedGlobalVarName(GVName))
Sym.Flags |= 1 << storage::Symbol::FB_used;
if (GV->isThreadLocal())
Sym.Flags |= 1 << storage::Symbol::FB_tls;
Expand Down Expand Up @@ -352,7 +349,6 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
setStr(Hdr.Producer, kExpectedProducerName);
setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple().str());
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
TT = IRMods[0]->getTargetTriple();

for (auto *M : IRMods)
if (Error Err = addModule(M))
Expand All @@ -378,7 +374,8 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
StringTableBuilder &StrtabBuilder,
BumpPtrAllocator &Alloc) {
return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
const Triple &TT = Mods[0]->getTargetTriple();
return Builder(Symtab, StrtabBuilder, Alloc, TT).build(Mods);
}

// Upgrade a vector of bitcode modules created by an old version of LLVM by
Expand Down
Loading