From aaec1db67cb1b15734708e2cde8c17370fe91349 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 15 Aug 2025 09:45:46 +0900 Subject: [PATCH] RuntimeLibcalls: Fix building hash table with duplicate entries We were sizing the table appropriately for the number of LibcallImpls, but many of those have identical names which were pushing up the collision count unnecessarily. This ends up decreasing the table size slightly, and makes it a bit faster. BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and BM_LookupRuntimeLibcallByNameSampleData by ~5%. As a secondary change, align the table size up to the next power of 2. This makes the table larger than before, but improves the sample data benchmark by an additional 5%. --- llvm/test/TableGen/RuntimeLibcallEmitter.td | 4 +- .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 76 ++++++++----------- 2 files changed, 35 insertions(+), 45 deletions(-) diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td b/llvm/test/TableGen/RuntimeLibcallEmitter.td index dbb5c5e7199d3..07c8abcaab567 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td @@ -176,9 +176,9 @@ def BlahLibrary : SystemRuntimeLibrary RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) { // CHECK: static constexpr uint16_t HashTableNameToEnum[16] = { -// CHECK: 2, // 0x000000705301b8, ___memset +// CHECK: 2, // CHECK: 0, -// CHECK: 6, // 0x0000001417a2af, calloc +// CHECK: 6, // CHECK: 0, // CHECK: }; diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index 1bbcb759f1813..a1020d5a0db1f 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -287,13 +287,6 @@ class RuntimeLibcallEmitter { void run(raw_ostream &OS); }; -/// Helper struct for the name hash table. -struct LookupEntry { - StringRef FuncName; - uint64_t Hash = 0; - unsigned TableValue = 0; -}; - } // End anonymous namespace. void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { @@ -339,14 +332,17 @@ static void emitHashFunction(raw_ostream &OS) { /// Return the table size, maximum number of collisions for the set of hashes static std::pair computePerfectHashParameters(ArrayRef Hashes) { - const int SizeOverhead = 10; - const int NumHashes = Hashes.size(); + // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp + const int SizeOverhead = 4; // Index derived from hash -> number of collisions. DenseMap Table; + unsigned NumHashes = Hashes.size(); + for (int MaxCollisions = 1;; ++MaxCollisions) { - for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) { + for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * NumHashes; + N <<= 1) { Table.clear(); bool NeedResize = false; @@ -365,41 +361,29 @@ computePerfectHashParameters(ArrayRef Hashes) { } } -static std::vector +static std::vector constructPerfectHashTable(ArrayRef Keywords, - ArrayRef Hashes, int Size, int Collisions, - StringToOffsetTable &OffsetTable) { - DenseSet Seen; - std::vector Lookup(Size * Collisions); - - for (const RuntimeLibcallImpl &LibCallImpl : Keywords) { - StringRef ImplName = LibCallImpl.getLibcallFuncName(); - - // We do not want to add repeated entries for cases with the same name, only - // an entry for the first, with the name collision enum values immediately - // following. - if (!Seen.insert(ImplName).second) - continue; - - uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1]; + ArrayRef Hashes, + ArrayRef TableValues, int Size, + int Collisions, StringToOffsetTable &OffsetTable) { + std::vector Lookup(Size * Collisions); + for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) { uint64_t Idx = (HashValue % static_cast(Size)) * static_cast(Collisions); bool Found = false; for (int J = 0; J < Collisions; ++J) { - LookupEntry &Entry = Lookup[Idx + J]; - if (Entry.TableValue == 0) { - Entry.FuncName = ImplName; - Entry.TableValue = LibCallImpl.getEnumVal(); - Entry.Hash = HashValue; + unsigned &Entry = Lookup[Idx + J]; + if (Entry == 0) { + Entry = TableValue; Found = true; break; } } if (!Found) - reportFatalInternalError("failure to hash " + ImplName); + reportFatalInternalError("failure to hash"); } return Lookup; @@ -409,15 +393,25 @@ constructPerfectHashTable(ArrayRef Keywords, void RuntimeLibcallEmitter::emitNameMatchHashTable( raw_ostream &OS, StringToOffsetTable &OffsetTable) const { std::vector Hashes(RuntimeLibcallImplDefList.size()); + std::vector TableValues(RuntimeLibcallImplDefList.size()); + DenseSet SeenFuncNames; size_t MaxFuncNameSize = 0; size_t Index = 0; + for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { StringRef ImplName = LibCallImpl.getLibcallFuncName(); - MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size()); - Hashes[Index++] = hash(ImplName); + if (SeenFuncNames.insert(ImplName).second) { + MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size()); + TableValues[Index] = LibCallImpl.getEnumVal(); + Hashes[Index++] = hash(ImplName); + } } + // Trim excess elements from non-unique entries. + Hashes.resize(SeenFuncNames.size()); + TableValues.resize(SeenFuncNames.size()); + LLVM_DEBUG({ for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { StringRef ImplName = LibCallImpl.getLibcallFuncName(); @@ -447,8 +441,9 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable( "#endif\n"; auto [Size, Collisions] = computePerfectHashParameters(Hashes); - std::vector Lookup = constructPerfectHashTable( - RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable); + std::vector Lookup = + constructPerfectHashTable(RuntimeLibcallImplDefList, Hashes, TableValues, + Size, Collisions, OffsetTable); LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = " << Size << ", maximum collisions = " << Collisions << '\n'); @@ -463,13 +458,8 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable( OS << " static constexpr uint16_t HashTableNameToEnum[" << Lookup.size() << "] = {\n"; - for (auto [FuncName, Hash, TableVal] : Lookup) { - OS << " " << TableVal << ','; - if (TableVal != 0) - OS << " // " << format_hex(Hash, 16) << ", " << FuncName; - - OS << '\n'; - } + for (unsigned TableVal : Lookup) + OS << " " << TableVal << ",\n"; OS << " };\n\n";