diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt index 1078efa55f497..9613678d2e0ac 100644 --- a/llvm/benchmarks/CMakeLists.txt +++ b/llvm/benchmarks/CMakeLists.txt @@ -11,3 +11,20 @@ add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED) +# Extract the list of symbols in a random utility as sample data. +set(SYMBOL_TEST_DATA_FILE "sample_symbol_list.txt") +set(SYMBOL_TEST_DATA_SOURCE_BINARY $) + +add_custom_command(OUTPUT ${SYMBOL_TEST_DATA_FILE} + COMMAND $ --no-demangle --no-sort + --format=just-symbols + ${SYMBOL_TEST_DATA_SOURCE_BINARY} > ${SYMBOL_TEST_DATA_FILE} + DEPENDS "$" "$") + +add_custom_target(generate-runtime-libcalls-sample-symbol-list + DEPENDS ${SYMBOL_TEST_DATA_FILE}) +add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED) + +add_dependencies(RuntimeLibcallsBench generate-runtime-libcalls-sample-symbol-list) +target_compile_definitions(RuntimeLibcallsBench PRIVATE + -DSYMBOL_TEST_DATA_FILE="${CMAKE_CURRENT_BINARY_DIR}/${SYMBOL_TEST_DATA_FILE}") diff --git a/llvm/benchmarks/RuntimeLibcalls.cpp b/llvm/benchmarks/RuntimeLibcalls.cpp new file mode 100644 index 0000000000000..47f68abff1e0d --- /dev/null +++ b/llvm/benchmarks/RuntimeLibcalls.cpp @@ -0,0 +1,116 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/RuntimeLibcalls.h" +#include "benchmark/benchmark.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TargetParser/Triple.h" +#include +#include +using namespace llvm; + +static constexpr unsigned MaxFuncNameSize = 53; + +static std::vector getLibcallNameStringRefs() { + std::vector Names(RTLIB::NumLibcallImpls); + // Keep the strlens on the StringRef construction out of the benchmark loop. + for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) { + const char *Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC); + Names[LC] = StringRef(Name); + } + + return Names; +} + +static std::vector getRandomFuncNames() { + std::mt19937_64 Rng; + std::uniform_int_distribution<> StringLengthDistribution(1, MaxFuncNameSize); + std::uniform_int_distribution<> CharDistribution(1, 255); + int NumTestFuncs = 1 << 10; + std::vector TestFuncNames(NumTestFuncs); + + for (std::string &TestFuncName : TestFuncNames) { + for (int I = 0, E = StringLengthDistribution(Rng); I != E; ++I) + TestFuncName += static_cast(CharDistribution(Rng)); + } + + return TestFuncNames; +} + +static std::vector readSymbolsFromFile(StringRef InputFile) { + auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile, /*IsText=*/true); + if (!BufOrError) { + reportFatalUsageError("failed to open \'" + Twine(InputFile) + + "\': " + BufOrError.getError().message()); + } + + // Hackily figure out if there's a prefix on the symbol names - llvm-nm + // appears to not have a flag to skip this. + llvm::Triple HostTriple(LLVM_HOST_TRIPLE); + std::string DummyDatalayout = "e"; + DummyDatalayout += DataLayout::getManglingComponent(HostTriple); + + DataLayout DL(DummyDatalayout); + char GlobalPrefix = DL.getGlobalPrefix(); + + std::vector Lines; + for (line_iterator LineIt(**BufOrError, /*SkipBlanks=*/true); + !LineIt.is_at_eof(); ++LineIt) { + StringRef SymbolName = *LineIt; + SymbolName.consume_front(StringRef(&GlobalPrefix, 1)); + + Lines.push_back(SymbolName.str()); + } + return Lines; +} + +static void BM_LookupRuntimeLibcallByNameKnownCalls(benchmark::State &State) { + std::vector Names = getLibcallNameStringRefs(); + + for (auto _ : State) { + for (StringRef Name : Names) { + benchmark::DoNotOptimize( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name).empty()); + } + } +} + +static void BM_LookupRuntimeLibcallByNameRandomCalls(benchmark::State &State) { + std::vector TestFuncNames = getRandomFuncNames(); + + for (auto _ : State) { + for (const std::string &Name : TestFuncNames) { + benchmark::DoNotOptimize( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name)) + .empty()); + } + } +} + +// This isn't fully representative, it doesn't include any anonymous functions. +// nm -n --no-demangle --format=just-symbols sample-binary > sample.txt +static void BM_LookupRuntimeLibcallByNameSampleData(benchmark::State &State) { + std::vector TestFuncNames = + readSymbolsFromFile(SYMBOL_TEST_DATA_FILE); + for (auto _ : State) { + for (const std::string &Name : TestFuncNames) { + benchmark::DoNotOptimize( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name)) + .empty()); + } + } +} + +BENCHMARK(BM_LookupRuntimeLibcallByNameKnownCalls); +BENCHMARK(BM_LookupRuntimeLibcallByNameRandomCalls); +BENCHMARK(BM_LookupRuntimeLibcallByNameSampleData); + +BENCHMARK_MAIN(); diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 2d1d07c5fd81b..078098eeb7148 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -132,11 +132,41 @@ struct RuntimeLibcallsInfo { return ImplToLibcall[Impl]; } + /// Check if a function name is a recognized runtime call of any kind. This + /// does not consider if this call is available for any current compilation, + /// just that it is a known call somewhere. This returns the set of all + /// LibcallImpls which match the name; multiple implementations with the same + /// name may exist but differ in interpretation based on the target context. + /// + /// Generated by tablegen. + LLVM_ABI static inline iota_range + lookupLibcallImplName(StringRef Name){ + // Inlining the early exit on the string name appears to be worthwhile when + // querying a real set of symbols +#define GET_LOOKUP_LIBCALL_IMPL_NAME_BODY +#include "llvm/IR/RuntimeLibcalls.inc" +#undef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY + } + /// Check if this is valid libcall for the current module, otherwise /// RTLIB::Unsupported. - LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const; + LLVM_ABI RTLIB::LibcallImpl + getSupportedLibcallImpl(StringRef FuncName) const { + for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) { + // FIXME: This should not depend on looking up ImplToLibcall, only the + // list of libcalls for the module. + RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; + if (Recognized != RTLIB::Unsupported) + return Recognized; + } + + return RTLIB::Unsupported; + } private: + LLVM_ABI static iota_range + lookupLibcallImplNameImpl(StringRef Name); + /// Stores the implementation choice for each each libcall. RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { RTLIB::Unsupported}; @@ -157,13 +187,11 @@ struct RuntimeLibcallsInfo { /// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind. LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls]; - /// Check if a function name is a recognized runtime call of any kind. This - /// does not consider if this call is available for any current compilation, - /// just that it is a known call somewhere. This returns the set of all - /// LibcallImpls which match the name; multiple implementations with the same - /// name may exist but differ in interpretation based on the target context. - LLVM_ABI static iterator_range::const_iterator> - getRecognizedLibcallImpls(StringRef FuncName); + /// Utility function for tablegenerated lookup function. Return a range of + /// enum values that apply for the function name at \p NameOffsetEntry with + /// the value \p StrOffset. + static inline iota_range + libcallImplNameHit(uint16_t NameOffsetEntry, uint16_t StrOffset); static bool darwinHasSinCosStret(const Triple &TT) { if (!TT.isOSDarwin()) diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index ac845c4998783..88cb192c08781 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -9,6 +9,7 @@ #include "llvm/IR/RuntimeLibcalls.h" #include "llvm/ADT/StringTable.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" #define DEBUG_TYPE "runtime-libcalls-info" @@ -18,9 +19,11 @@ using namespace RTLIB; #define GET_INIT_RUNTIME_LIBCALL_NAMES #define GET_SET_TARGET_RUNTIME_LIBCALL_SETS +#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME #include "llvm/IR/RuntimeLibcalls.inc" #undef GET_INIT_RUNTIME_LIBCALL_NAMES #undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS +#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME /// Set default libcall names. If a target wants to opt-out of a libcall it /// should be placed here. @@ -58,49 +61,23 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, } } -RTLIB::LibcallImpl -RuntimeLibcallsInfo::getSupportedLibcallImpl(StringRef FuncName) const { - const ArrayRef RuntimeLibcallNameOffsets( - RuntimeLibcallNameOffsetTable); - - iterator_range::const_iterator> Range = - getRecognizedLibcallImpls(FuncName); - - for (auto I = Range.begin(); I != Range.end(); ++I) { - RTLIB::LibcallImpl Impl = - static_cast(I - RuntimeLibcallNameOffsets.begin()); - - // FIXME: This should not depend on looking up ImplToLibcall, only the list - // of libcalls for the module. - RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; - if (Recognized != RTLIB::Unsupported) - return Recognized; +LLVM_ATTRIBUTE_ALWAYS_INLINE +iota_range +RuntimeLibcallsInfo::libcallImplNameHit(uint16_t NameOffsetEntry, + uint16_t StrOffset) { + int NumAliases = 1; + for (uint16_t Entry : ArrayRef(RuntimeLibcallNameOffsetTable) + .drop_front(NameOffsetEntry + 1)) { + if (Entry != StrOffset) + break; + ++NumAliases; } - return RTLIB::Unsupported; -} - -iterator_range::const_iterator> -RuntimeLibcallsInfo::getRecognizedLibcallImpls(StringRef FuncName) { - StringTable::Iterator It = lower_bound(RuntimeLibcallImplNameTable, FuncName); - if (It == RuntimeLibcallImplNameTable.end() || *It != FuncName) - return iterator_range(ArrayRef()); - - uint16_t IndexVal = It.offset().value(); - const ArrayRef TableRef(RuntimeLibcallNameOffsetTable); - - ArrayRef::const_iterator E = TableRef.end(); - ArrayRef::const_iterator EntriesBegin = - std::lower_bound(TableRef.begin(), E, IndexVal); - ArrayRef::const_iterator EntriesEnd = EntriesBegin; - - while (EntriesEnd != E && *EntriesEnd == IndexVal) - ++EntriesEnd; - - assert(EntriesBegin != E && - "libcall found in name table but not offset table"); - - return make_range(EntriesBegin, EntriesEnd); + RTLIB::LibcallImpl ImplStart = static_cast( + &RuntimeLibcallNameOffsetTable[NameOffsetEntry] - + &RuntimeLibcallNameOffsetTable[0]); + return enum_seq(ImplStart, + static_cast(ImplStart + NumAliases)); } bool RuntimeLibcallsInfo::isAAPCS_ABI(const Triple &TT, StringRef ABIName) { diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp index 0f194953787e6..0043f02107fb8 100644 --- a/llvm/lib/Object/IRSymtab.cpp +++ b/llvm/lib/Object/IRSymtab.cpp @@ -46,7 +46,7 @@ static cl::opt DisableBitcodeVersionUpgrade( "disable-bitcode-version-upgrade", cl::Hidden, cl::desc("Disable automatic bitcode upgrade for version mismatch")); -static const char *PreservedSymbols[] = { +static constexpr StringLiteral PreservedSymbols[] = { // There are global variables, so put it here instead of in // RuntimeLibcalls.td. // TODO: Are there similar such variables? @@ -54,6 +54,10 @@ static const char *PreservedSymbols[] = { "__stack_chk_guard", }; +static bool isPreservedGlobalVarName(StringRef Name) { + return PreservedSymbols[0] == Name || PreservedSymbols[1] == Name; +} + namespace { const char *getExpectedProducerName() { @@ -81,12 +85,16 @@ struct Builder { // The StringTableBuilder does not create a copy of any strings added to it, // so this provides somewhere to store any strings that we create. Builder(SmallVector &Symtab, StringTableBuilder &StrtabBuilder, - BumpPtrAllocator &Alloc) - : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} + BumpPtrAllocator &Alloc, const Triple &TT) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc), TT(TT), + Libcalls(TT) {} DenseMap ComdatMap; Mangler Mang; - Triple TT; + const Triple &TT; + + // FIXME: This shouldn't be here. + RTLIB::RuntimeLibcallsInfo Libcalls; std::vector Comdats; std::vector Mods; @@ -98,6 +106,10 @@ struct Builder { std::vector DependentLibraries; + bool isPreservedLibFuncName(StringRef Name) { + return Libcalls.getSupportedLibcallImpl(Name) != RTLIB::Unsupported; + } + void setStr(storage::Str &S, StringRef Value) { S.Offset = StrtabBuilder.add(Value); S.Size = Value.size(); @@ -213,19 +225,6 @@ Expected Builder::getComdatIndex(const Comdat *C, const Module *M) { return P.first->second; } -static StringSet<> buildPreservedSymbolsSet(const Triple &TT) { - StringSet<> PreservedSymbolSet; - PreservedSymbolSet.insert(std::begin(PreservedSymbols), - std::end(PreservedSymbols)); - // FIXME: Do we need to pass in ABI fields from TargetOptions? - RTLIB::RuntimeLibcallsInfo Libcalls(TT); - for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) { - if (Impl != RTLIB::Unsupported) - PreservedSymbolSet.insert(Libcalls.getLibcallImplName(Impl)); - } - return PreservedSymbolSet; -} - Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, const SmallPtrSet &Used, ModuleSymbolTable::Symbol Msym) { @@ -279,13 +278,11 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, return Error::success(); } - setStr(Sym.IRName, GV->getName()); - - static const StringSet<> PreservedSymbolsSet = - buildPreservedSymbolsSet(GV->getParent()->getTargetTriple()); - bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName()); + StringRef GVName = GV->getName(); + setStr(Sym.IRName, GVName); - if (Used.count(GV) || IsPreservedSymbol) + if (Used.count(GV) || isPreservedLibFuncName(GVName) || + isPreservedGlobalVarName(GVName)) Sym.Flags |= 1 << storage::Symbol::FB_used; if (GV->isThreadLocal()) Sym.Flags |= 1 << storage::Symbol::FB_tls; @@ -352,7 +349,6 @@ Error Builder::build(ArrayRef IRMods) { setStr(Hdr.Producer, kExpectedProducerName); setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple().str()); setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); - TT = IRMods[0]->getTargetTriple(); for (auto *M : IRMods) if (Error Err = addModule(M)) @@ -378,7 +374,8 @@ Error Builder::build(ArrayRef IRMods) { Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc) { - return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); + const Triple &TT = Mods[0]->getTargetTriple(); + return Builder(Symtab, StrtabBuilder, Alloc, TT).build(Mods); } // Upgrade a vector of bitcode modules created by an old version of LLVM by diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td b/llvm/test/TableGen/RuntimeLibcallEmitter.td index a2d946f3aa84f..54ca3f97e2d4b 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td @@ -149,6 +149,32 @@ def BlahLibrary : SystemRuntimeLibrary 9) +// CHECK-NEXT: return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); +// CHECK-NEXT: return lookupLibcallImplNameImpl(Name); +// CHECK-NEXT: #endif + +// CHECK: #ifdef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME +// CHECK-NEXT: static inline uint64_t hash(StringRef Str) { +// CHECK-NEXT: return static_cast(xxh3_64bits(Str)); +// CHECK-NEXT: } + +// CHECK: iota_range RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) { +// CHECK: static constexpr std::pair HashTableNameToEnum[16] = { +// CHECK: {2, 9}, // 0x000000705301b8, ___memset +// CHECK: {0, 0}, +// CHECK: {6, 6}, // 0x0000001417a2af, calloc +// CHECK: {0, 0}, +// CHECK: }; + +// CHECK: unsigned Idx = (hash(Name) % 8) * 2; +// CHECK: for (int I = 0; I != 2; ++I) { +// CHECK: return libcallImplNameHit(Entry, StrOffset); + +// CHECK: return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); +// CHECK-NEXT: } // CHECK: void llvm::RTLIB::RuntimeLibcallsInfo::setTargetRuntimeLibcallSets(const llvm::Triple &TT, FloatABI::ABIType FloatABI, EABI EABIVersion, StringRef ABIName) { // CHECK-NEXT: struct LibcallImplPair { diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index b66eae93f9339..8b7bd3997ea27 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_unittest(IRTests PatternMatch.cpp ShuffleVectorInstTest.cpp StructuralHashTest.cpp + RuntimeLibcallsTest.cpp TimePassesTest.cpp TypesTest.cpp UseTest.cpp diff --git a/llvm/unittests/IR/RuntimeLibcallsTest.cpp b/llvm/unittests/IR/RuntimeLibcallsTest.cpp new file mode 100644 index 0000000000000..94ed56e92bd55 --- /dev/null +++ b/llvm/unittests/IR/RuntimeLibcallsTest.cpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/RuntimeLibcalls.h" +#include "llvm/ADT/STLExtras.h" +#include "gtest/gtest.h" +using namespace llvm; + +namespace { + +TEST(RuntimeLibcallsTest, LibcallImplByName) { + EXPECT_TRUE(RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("").empty()); + EXPECT_TRUE( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("unknown").empty()); + EXPECT_TRUE( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("Unsupported").empty()); + EXPECT_TRUE( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("unsupported").empty()); + + for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) { + const char *Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC); + EXPECT_TRUE(is_contained( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name), LC)); + } + + // Test first libcall name + EXPECT_EQ( + RTLIB::arm64ec__Unwind_Resume, + *RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("#_Unwind_Resume") + .begin()); + // Test longest libcall names + EXPECT_EQ(RTLIB::__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, + *RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName( + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes") + .begin()); + + { + auto SquirtleSquad = + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("sqrtl"); + ASSERT_EQ(size(SquirtleSquad), 3); + auto I = SquirtleSquad.begin(); + EXPECT_EQ(*I++, RTLIB::sqrt_f128); + EXPECT_EQ(*I++, RTLIB::sqrt_f80); + EXPECT_EQ(*I++, RTLIB::sqrt_ppcf128); + } + + // Last libcall + { + auto Truncs = RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("truncl"); + ASSERT_EQ(size(Truncs), 3); + auto I = Truncs.begin(); + EXPECT_EQ(*I++, RTLIB::trunc_f128); + EXPECT_EQ(*I++, RTLIB::trunc_f80); + EXPECT_EQ(*I++, RTLIB::trunc_ppcf128); + } +} + +} // namespace diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index 0fc230c4714f0..775cef22db0b6 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -6,10 +6,15 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "runtime-libcall-emitter" + +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/xxhash.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/SetTheory.h" @@ -215,6 +220,9 @@ class RuntimeLibcallEmitter { private: void emitGetRuntimeLibcallEnum(raw_ostream &OS) const; + void emitNameMatchHashTable(raw_ostream &OS, + StringToOffsetTable &OffsetTable) const; + void emitGetInitRuntimeLibcallNames(raw_ostream &OS) const; void emitSystemRuntimeLibrarySetCalls(raw_ostream &OS) const; @@ -255,12 +263,9 @@ class RuntimeLibcallEmitter { RuntimeLibcallImplDefList.emplace_back(LibCallImplDef, Def2RuntimeLibcall, LibCallImplEnumVal++); - RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); - + const RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); Def2RuntimeLibcallImpl[LibCallImplDef] = &LibCallImpl; - // const RuntimeLibcallImpl &LibCallImpl = - // RuntimeLibcallImplDefList.back(); if (LibCallImpl.isDefault()) { const RuntimeLibcall *Provides = LibCallImpl.getProvides(); if (!Provides) @@ -282,6 +287,13 @@ class RuntimeLibcallEmitter { void run(raw_ostream &OS); }; +/// Helper struct for the name hash table. +struct LookupEntry { + StringRef FuncName; + uint64_t Hash = 0; + unsigned TableValue = 0; +}; + } // End anonymous namespace. void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { @@ -295,8 +307,6 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { OS << " " << Name << " = " << LibCall.getEnumVal() << ",\n"; } - // TODO: Emit libcall names as string offset table. - OS << " UNKNOWN_LIBCALL = " << RuntimeLibcallDefList.size() << "\n};\n\n" "enum LibcallImpl : unsigned short {\n" @@ -315,8 +325,181 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { "#endif\n\n"; } +// StringMap uses xxh3_64bits, truncated to uint32_t. +static uint64_t hash(StringRef Str) { + return static_cast(xxh3_64bits(Str)); +} + +static void emitHashFunction(raw_ostream &OS) { + OS << "static inline uint64_t hash(StringRef Str) {\n" + " return static_cast(xxh3_64bits(Str));\n" + "}\n\n"; +} + +/// Return the table size, maximum number of collisions for the set of hashes +static std::pair +computePerfectHashParameters(ArrayRef Hashes) { + const int SizeOverhead = 10; + const int NumHashes = Hashes.size(); + + // Index derived from hash -> number of collisions. + DenseMap Table; + + for (int MaxCollisions = 1;; ++MaxCollisions) { + for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) { + Table.clear(); + + bool NeedResize = false; + for (uint64_t H : Hashes) { + uint64_t Idx = H % static_cast(N); + if (++Table[Idx] > MaxCollisions) { + // Need to resize the final table if we increased the collision count. + NeedResize = true; + break; + } + } + + if (!NeedResize) + return {N, MaxCollisions}; + } + } +} + +static std::vector +constructPerfectHashTable(ArrayRef Keywords, + ArrayRef Hashes, int Size, int Collisions, + StringToOffsetTable &OffsetTable) { + DenseSet Seen; + std::vector Lookup(Size * Collisions); + + for (const RuntimeLibcallImpl &LibCallImpl : Keywords) { + StringRef ImplName = LibCallImpl.getLibcallFuncName(); + + // We do not want to add repeated entries for cases with the same name, only + // an entry for the first, with the name collision enum values immediately + // following. + if (!Seen.insert(ImplName).second) + continue; + + uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1]; + + uint64_t Idx = (HashValue % static_cast(Size)) * + static_cast(Collisions); + + bool Found = false; + for (int J = 0; J < Collisions; ++J) { + LookupEntry &Entry = Lookup[Idx + J]; + if (Entry.TableValue == 0) { + Entry.FuncName = ImplName; + Entry.TableValue = LibCallImpl.getEnumVal(); + Entry.Hash = HashValue; + Found = true; + break; + } + } + + if (!Found) + reportFatalInternalError("failure to hash " + ImplName); + } + + return Lookup; +} + +/// Generate hash table based lookup by name. +void RuntimeLibcallEmitter::emitNameMatchHashTable( + raw_ostream &OS, StringToOffsetTable &OffsetTable) const { + std::vector Hashes(RuntimeLibcallImplDefList.size()); + + size_t MaxFuncNameSize = 0; + size_t Index = 0; + for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + StringRef ImplName = LibCallImpl.getLibcallFuncName(); + MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size()); + Hashes[Index++] = hash(ImplName); + } + + LLVM_DEBUG({ + for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + StringRef ImplName = LibCallImpl.getLibcallFuncName(); + if (ImplName.size() == MaxFuncNameSize) { + dbgs() << "Maximum runtime libcall name size: " << ImplName << '(' + << MaxFuncNameSize << ")\n"; + } + } + }); + + // Early exiting on the symbol name provides a significant speedup in the miss + // case on the set of symbols in a clang binary. Emit this as an inlinable + // precondition in the header. + // + // The empty check is also used to get sensible behavior on anonymous + // functions. + // + // TODO: It may make more sense to split the search by string size more. There + // are a few outliers, most call names are small. + OS << "#ifdef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY\n" + " size_t Size = Name.size();\n" + " if (Size == 0 || Size > " + << MaxFuncNameSize + << ")\n" + " return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported);\n" + " return lookupLibcallImplNameImpl(Name);\n" + "#endif\n"; + + auto [Size, Collisions] = computePerfectHashParameters(Hashes); + std::vector Lookup = constructPerfectHashTable( + RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable); + + LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = " + << Size << ", maximum collisions = " << Collisions << '\n'); + + OS << "#ifdef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME\n"; + emitHashFunction(OS); + + OS << "iota_range RTLIB::RuntimeLibcallsInfo::" + "lookupLibcallImplNameImpl(StringRef Name) {\n"; + + // Emit pair of RTLIB::LibcallImpl, size of the string name. It's important to + // avoid strlen on the string table entries. + OS << " static constexpr std::pair HashTableNameToEnum[" + << Lookup.size() << "] = {\n"; + + for (auto [FuncName, Hash, TableVal] : Lookup) { + OS << " {" << TableVal << ", " << FuncName.size() << "},"; + + if (TableVal != 0) { + OS << " // " << format_hex(Hash, 16) << ", " << FuncName; + } + + OS << '\n'; + } + + OS << " };\n\n"; + + OS << " unsigned Idx = (hash(Name) % " << Size << ") * " << Collisions + << ";\n\n" + " for (int I = 0; I != " + << Collisions << R"(; ++I) { + auto [Entry, StringSize] = HashTableNameToEnum[Idx + I]; + const uint16_t StrOffset = RuntimeLibcallNameOffsetTable[Entry]; + StringRef Str( + &RTLIB::RuntimeLibcallsInfo::RuntimeLibcallImplNameTableStorage[StrOffset], + StringSize); + if (Str == Name) + return libcallImplNameHit(Entry, StrOffset); + } + + return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); +} +)"; + + OS << "#endif\n\n"; +} + void RuntimeLibcallEmitter::emitGetInitRuntimeLibcallNames( raw_ostream &OS) const { + OS << "#ifdef GET_INIT_RUNTIME_LIBCALL_NAMES\n"; + // Emit the implementation names StringToOffsetTable Table(/*AppendZero=*/true, "RTLIB::RuntimeLibcallsInfo::"); @@ -351,6 +534,10 @@ const uint16_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameOffsetTable[] = { OS << '\n'; } OS << "};\n\n"; + + OS << "#endif\n\n"; + + emitNameMatchHashTable(OS, Table); } void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( @@ -531,9 +718,7 @@ void RuntimeLibcallEmitter::run(raw_ostream &OS) { emitSourceFileHeader("Runtime LibCalls Source Fragment", OS, Records); emitGetRuntimeLibcallEnum(OS); - OS << "#ifdef GET_INIT_RUNTIME_LIBCALL_NAMES\n"; emitGetInitRuntimeLibcallNames(OS); - OS << "#endif\n\n"; OS << "#ifdef GET_SET_TARGET_RUNTIME_LIBCALL_SETS\n"; emitSystemRuntimeLibrarySetCalls(OS);