Skip to content

Commit 4a0dee7

Browse files
committed
rustdoc-search: better hashing, faster unification
The hash changes are based on some tests with `arti` and various specific queries, aimed at reducing the false positive rate. Sorting the query elements so that generics always come first is instead aimed at reducing the number of Map operations on mgens, assuming if the bloom filter does find a false positive, it'll be able to reject the row without having to track a mapping. - https://hur.st/bloomfilter/?n=3&p=&m=96&k=6 Different functions have different amounts of inputs, and unification isn't very slow anyway, so figuring out a single ideal number of hash functions is nasty, but 6 keeps things low even up to 10 inputs. - https://web.archive.org/web/20210927123933/https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.2442&rep=rep1&type=pdf This is the `h1` and `h2`, both derived from `h0`.
1 parent 8f7b52a commit 4a0dee7

File tree

1 file changed

+46
-10
lines changed

1 file changed

+46
-10
lines changed

src/librustdoc/html/static/js/search.js

+46-10
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,22 @@ function initSearch(rawSearchIndex) {
22662266
);
22672267
}
22682268
} else if (parsedQuery.foundElems > 0) {
2269+
// Sort input and output so that generic type variables go first and
2270+
// types with generic parameters go last.
2271+
// That's because of the way unification is structured: it eats off
2272+
// the end, and hits a fast path if the last item is a simple atom.
2273+
const sortQ = (a, b) => {
2274+
const ag = a.generics.length === 0 && a.bindings.size === 0;
2275+
const bg = b.generics.length === 0 && b.bindings.size === 0;
2276+
if (ag !== bg) {
2277+
return ag - bg;
2278+
}
2279+
const ai = a.id > 0;
2280+
const bi = b.id > 0;
2281+
return ai - bi;
2282+
};
2283+
parsedQuery.elems.sort(sortQ);
2284+
parsedQuery.returned.sort(sortQ);
22692285
for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
22702286
handleArgs(searchIndex[i], i, results_others);
22712287
}
@@ -2831,23 +2847,44 @@ ${item.displayPath}<span class="${type}">${name}</span>\
28312847
* @param {Set<number>} fps - Set of distinct items
28322848
*/
28332849
function buildFunctionTypeFingerprint(type, output, fps) {
2834-
28352850
let input = type.id;
28362851
// All forms of `[]` get collapsed down to one thing in the bloom filter.
28372852
// Differentiating between arrays and slices, if the user asks for it, is
28382853
// still done in the matching algorithm.
28392854
if (input === typeNameIdOfArray || input === typeNameIdOfSlice) {
28402855
input = typeNameIdOfArrayOrSlice;
28412856
}
2857+
// http://burtleburtle.net/bob/hash/integer.html
2858+
// ~~ is toInt32. It's used before adding, so
2859+
// the number stays in safe integer range.
2860+
const hashint1 = k => {
2861+
k = (~~k + 0x7ed55d16) + (k << 12);
2862+
k = (k ^ 0xc761c23c) ^ (k >>> 19);
2863+
k = (~~k + 0x165667b1) + (k << 5);
2864+
k = (~~k + 0xd3a2646c) ^ (k << 9);
2865+
k = (~~k + 0xfd7046c5) + (k << 3);
2866+
return (k ^ 0xb55a4f09) ^ (k >>> 16);
2867+
};
2868+
const hashint2 = k => {
2869+
k = ~k + (k << 15);
2870+
k ^= k >>> 12;
2871+
k += k << 2;
2872+
k ^= k >>> 4;
2873+
k = Math.imul(k, 2057);
2874+
return k ^ (k >> 16);
2875+
};
28422876
if (input !== null) {
2843-
// https://docs.rs/rustc-hash/1.1.0/src/rustc_hash/lib.rs.html#60
2844-
// Rotate is skipped because we're only doing one cycle anyway.
2845-
const h0 = Math.imul(input, 0x9e3779b9);
2846-
const h1 = Math.imul(479001599 ^ input, 0x9e3779b9);
2847-
const h2 = Math.imul(433494437 ^ input, 0x9e3779b9);
2848-
output[0] |= 1 << (h0 % 32);
2849-
output[1] |= 1 << (h1 % 32);
2850-
output[2] |= 1 << (h2 % 32);
2877+
const h0a = hashint1(input);
2878+
const h0b = hashint2(input);
2879+
// Less Hashing, Same Performance: Building a Better Bloom Filter
2880+
// doi=10.1.1.72.2442
2881+
const h1a = ~~(h0a + Math.imul(h0b, 2));
2882+
const h1b = ~~(h0a + Math.imul(h0b, 3));
2883+
const h2a = ~~(h0a + Math.imul(h0b, 4));
2884+
const h2b = ~~(h0a + Math.imul(h0b, 5));
2885+
output[0] |= (1 << (h0a % 32)) | (1 << (h1b % 32));
2886+
output[1] |= (1 << (h1a % 32)) | (1 << (h2b % 32));
2887+
output[2] |= (1 << (h2a % 32)) | (1 << (h0b % 32));
28512888
fps.add(input);
28522889
}
28532890
for (const g of type.generics) {
@@ -2876,7 +2913,6 @@ ${item.displayPath}<span class="${type}">${name}</span>\
28762913
* This function might return 0!
28772914
*/
28782915
function compareTypeFingerprints(fullId, queryFingerprint) {
2879-
28802916
const fh0 = functionTypeFingerprint[fullId * 4];
28812917
const fh1 = functionTypeFingerprint[(fullId * 4) + 1];
28822918
const fh2 = functionTypeFingerprint[(fullId * 4) + 2];

0 commit comments

Comments
 (0)