|
| 1 | +//===- TrieHashIndexGenerator.h ---------------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#ifndef LLVM_ADT_TRIEHASHINDEXGENERATOR_H |
| 10 | +#define LLVM_ADT_TRIEHASHINDEXGENERATOR_H |
| 11 | + |
| 12 | +#include "llvm/ADT/ArrayRef.h" |
| 13 | +#include <optional> |
| 14 | + |
| 15 | +namespace llvm { |
| 16 | + |
| 17 | +/// The utility class that helps computing the index of the object inside trie |
| 18 | +/// from its hash. The generator can be configured with the number of bits |
| 19 | +/// used for each level of trie structure with \c NumRootsBits and \c |
| 20 | +/// NumSubtrieBits. |
| 21 | +/// For example, try computing indexes for a 16-bit hash 0x1234 with 8-bit root |
| 22 | +/// and 4-bit sub-trie: |
| 23 | +/// |
| 24 | +/// IndexGenerator IndexGen{8, 4, Hash}; |
| 25 | +/// size_t index1 = IndexGen.next(); // index 18 in root node. |
| 26 | +/// size_t index2 = IndexGen.next(); // index 3 in sub-trie level 1. |
| 27 | +/// size_t index3 = IndexGen.next(); // index 4 in sub-tire level 2. |
| 28 | +/// |
| 29 | +/// This is used by different trie implementation to figure out where to |
| 30 | +/// insert/find the object in the data structure. |
| 31 | +struct TrieHashIndexGenerator { |
| 32 | + size_t NumRootBits; |
| 33 | + size_t NumSubtrieBits; |
| 34 | + ArrayRef<uint8_t> Bytes; |
| 35 | + std::optional<size_t> StartBit = std::nullopt; |
| 36 | + |
| 37 | + // Get the number of bits used to generate current index. |
| 38 | + size_t getNumBits() const { |
| 39 | + assert(StartBit); |
| 40 | + size_t TotalNumBits = Bytes.size() * 8; |
| 41 | + assert(*StartBit <= TotalNumBits); |
| 42 | + return std::min(*StartBit ? NumSubtrieBits : NumRootBits, |
| 43 | + TotalNumBits - *StartBit); |
| 44 | + } |
| 45 | + |
| 46 | + // Get the index of the object in the next level of trie. |
| 47 | + size_t next() { |
| 48 | + if (!StartBit) { |
| 49 | + // Compute index for root when StartBit is not set. |
| 50 | + StartBit = 0; |
| 51 | + return getIndex(Bytes, *StartBit, NumRootBits); |
| 52 | + } |
| 53 | + if (*StartBit < Bytes.size() * 8) { |
| 54 | + // Compute index for sub-trie. |
| 55 | + *StartBit += *StartBit ? NumSubtrieBits : NumRootBits; |
| 56 | + assert((*StartBit - NumRootBits) % NumSubtrieBits == 0); |
| 57 | + return getIndex(Bytes, *StartBit, NumSubtrieBits); |
| 58 | + } |
| 59 | + // All the bits are consumed. |
| 60 | + return end(); |
| 61 | + } |
| 62 | + |
| 63 | + // Provide a hint to speed up the index generation by providing the |
| 64 | + // information of the hash in current level. For example, if the object is |
| 65 | + // known to have \c Index on a level that already consumes first n \c Bits of |
| 66 | + // the hash, it can start index generation from this level by calling \c hint |
| 67 | + // function. |
| 68 | + size_t hint(unsigned Index, unsigned Bit) { |
| 69 | + assert(Bit < Bytes.size() * 8); |
| 70 | + assert(Bit == 0 || (Bit - NumRootBits) % NumSubtrieBits == 0); |
| 71 | + StartBit = Bit; |
| 72 | + return Index; |
| 73 | + } |
| 74 | + |
| 75 | + // Utility function for looking up the index in the trie for an object that |
| 76 | + // has colliding hash bits in the front as the hash of the object that is |
| 77 | + // currently being computed. |
| 78 | + size_t getCollidingBits(ArrayRef<uint8_t> CollidingBits) const { |
| 79 | + assert(StartBit); |
| 80 | + return getIndex(CollidingBits, *StartBit, NumSubtrieBits); |
| 81 | + } |
| 82 | + |
| 83 | + size_t end() const { return SIZE_MAX; } |
| 84 | + |
| 85 | + // Compute the index for the object from its hash, current start bits, and |
| 86 | + // the number of bits used for current level. |
| 87 | + static size_t getIndex(ArrayRef<uint8_t> Bytes, size_t StartBit, |
| 88 | + size_t NumBits) { |
| 89 | + assert(StartBit < Bytes.size() * 8); |
| 90 | + // Drop all the bits before StartBit. |
| 91 | + Bytes = Bytes.drop_front(StartBit / 8u); |
| 92 | + StartBit %= 8u; |
| 93 | + size_t Index = 0; |
| 94 | + // Compute the index using the bits in range [StartBit, StartBit + NumBits), |
| 95 | + // note the range can spread across few `uint8_t` in the array. |
| 96 | + for (uint8_t Byte : Bytes) { |
| 97 | + size_t ByteStart = 0, ByteEnd = 8; |
| 98 | + if (StartBit) { |
| 99 | + ByteStart = StartBit; |
| 100 | + Byte &= (1u << (8 - StartBit)) - 1u; |
| 101 | + StartBit = 0; |
| 102 | + } |
| 103 | + size_t CurrentNumBits = ByteEnd - ByteStart; |
| 104 | + if (CurrentNumBits > NumBits) { |
| 105 | + Byte >>= CurrentNumBits - NumBits; |
| 106 | + CurrentNumBits = NumBits; |
| 107 | + } |
| 108 | + Index <<= CurrentNumBits; |
| 109 | + Index |= Byte & ((1u << CurrentNumBits) - 1u); |
| 110 | + |
| 111 | + assert(NumBits >= CurrentNumBits); |
| 112 | + NumBits -= CurrentNumBits; |
| 113 | + if (!NumBits) |
| 114 | + break; |
| 115 | + } |
| 116 | + return Index; |
| 117 | + } |
| 118 | +}; |
| 119 | + |
| 120 | +} // namespace llvm |
| 121 | + |
| 122 | +#endif // LLVM_ADT_TRIEHASHINDEXGENERATOR_H |
0 commit comments