Skip to content

[std]: Use XXHash64 instead FNV-1a #1581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions std/assembly/map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ export class Map<K,V> {

// buckets referencing their respective first entry, usize[bucketsMask + 1]
private buckets: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>BUCKET_SIZE);
private bucketsMask: u32 = INITIAL_CAPACITY - 1;
private bucketsMask: u64 = INITIAL_CAPACITY - 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit confused by the u32/u64 changes in Map and Set. For instance, u64 isn't really necessary here given that ArrayBuffer capacity is capped, but also may make sense given that hashes are 64-bit now, not sure. But then, further down below, we still have rehash(newBucketsMask: u32) or halfBucketsMask = <u32>(this.bucketsMask >> 1). Now, just using usize as a workaround also doesn't seem quite right, hmm. A strange mix.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tied just truncate u64 result to u32 inside hashes but it looks less optimal with more u64 -> u32 conversions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this makes me a bit in favor of the 32-bit variant for now. Would you agree with a plan like, let's use 32-bit now, and once we have Memory64 support, revisit 64-bit? Iirc that's about what you suggested earlier as well (sorry).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I'm ok with that. Will sync XXH32


// entries in insertion order, MapEntry<K,V>[entriesCapacity]
private entries: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>ENTRY_SIZE<K,V>());
Expand All @@ -83,7 +83,7 @@ export class Map<K,V> {
this.entriesCount = 0;
}

private find(key: K, hashCode: u32): MapEntry<K,V> | null {
private find(key: K, hashCode: u64): MapEntry<K,V> | null {
var entry = load<MapEntry<K,V>>( // unmanaged!
changetype<usize>(this.buckets) + <usize>(hashCode & this.bucketsMask) * BUCKET_SIZE
);
Expand Down Expand Up @@ -118,11 +118,11 @@ export class Map<K,V> {
} else {
// check if rehashing is necessary
if (this.entriesOffset == this.entriesCapacity) {
this.rehash(
this.rehash(u32(
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D
? this.bucketsMask // just rehash if 1/4+ entries are empty
: (this.bucketsMask << 1) | 1 // grow capacity to next 2^N
);
));
}
// append new entry
let entries = this.entries;
Expand Down Expand Up @@ -151,7 +151,7 @@ export class Map<K,V> {
entry.taggedNext |= EMPTY;
--this.entriesCount;
// check if rehashing is appropriate
var halfBucketsMask = this.bucketsMask >> 1;
var halfBucketsMask = <u32>(this.bucketsMask >> 1);
if (
halfBucketsMask + 1 >= max<u32>(INITIAL_CAPACITY, this.entriesCount) &&
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D
Expand All @@ -176,8 +176,8 @@ export class Map<K,V> {
let oldEntryKey = oldEntry.key;
newEntry.key = oldEntryKey;
newEntry.value = oldEntry.value;
let newBucketIndex = HASH<K>(oldEntryKey) & newBucketsMask;
let newBucketPtrBase = changetype<usize>(newBuckets) + <usize>newBucketIndex * BUCKET_SIZE;
let newBucketIndex = <usize>(HASH<K>(oldEntryKey) & newBucketsMask);
let newBucketPtrBase = changetype<usize>(newBuckets) + newBucketIndex * BUCKET_SIZE;
newEntry.taggedNext = load<usize>(newBucketPtrBase);
store<usize>(newBucketPtrBase, newPtr);
newPtr += ENTRY_SIZE<K,V>();
Expand Down
10 changes: 5 additions & 5 deletions std/assembly/set.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export class Set<T> {

// buckets referencing their respective first entry, usize[bucketsMask + 1]
private buckets: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>BUCKET_SIZE);
private bucketsMask: u32 = INITIAL_CAPACITY - 1;
private bucketsMask: u64 = INITIAL_CAPACITY - 1;

// entries in insertion order, SetEntry<K>[entriesCapacity]
private entries: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>ENTRY_SIZE<T>());
Expand All @@ -80,7 +80,7 @@ export class Set<T> {
this.entriesCount = 0;
}

private find(key: T, hashCode: u32): SetEntry<T> | null {
private find(key: T, hashCode: u64): SetEntry<T> | null {
var entry = load<SetEntry<T>>( // unmanaged!
changetype<usize>(this.buckets) + <usize>(hashCode & this.bucketsMask) * BUCKET_SIZE
);
Expand All @@ -103,11 +103,11 @@ export class Set<T> {
if (!entry) {
// check if rehashing is necessary
if (this.entriesOffset == this.entriesCapacity) {
this.rehash(
this.rehash(u32(
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D
? this.bucketsMask // just rehash if 1/4+ entries are empty
: (this.bucketsMask << 1) | 1 // grow capacity to next 2^N
);
));
}
// append new entry
entry = changetype<SetEntry<T>>(changetype<usize>(this.entries) + <usize>(this.entriesOffset++) * ENTRY_SIZE<T>());
Expand Down Expand Up @@ -136,7 +136,7 @@ export class Set<T> {
entry.taggedNext |= EMPTY;
--this.entriesCount;
// check if rehashing is appropriate
var halfBucketsMask = this.bucketsMask >> 1;
var halfBucketsMask = <u32>(this.bucketsMask >> 1);
if (
halfBucketsMask + 1 >= max<u32>(INITIAL_CAPACITY, this.entriesCount) &&
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D
Expand Down
154 changes: 111 additions & 43 deletions std/assembly/util/hash.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// @ts-ignore: decorator
@inline
export function HASH<T>(key: T): u32 {
export function HASH<T>(key: T): u64 {
if (isString<T>()) {
return hashStr(changetype<string>(key));
} else if (isReference<T>()) {
Expand All @@ -10,63 +8,133 @@ export function HASH<T>(key: T): u32 {
if (sizeof<T>() == 4) return hash32(reinterpret<u32>(f32(key)));
if (sizeof<T>() == 8) return hash64(reinterpret<u64>(f64(key)));
} else {
if (sizeof<T>() == 1) return hash8 (u32(key));
if (sizeof<T>() == 2) return hash16(u32(key));
if (sizeof<T>() == 4) return hash32(u32(key));
if (sizeof<T>() <= 4) return hash32(u32(key), sizeof<T>());
if (sizeof<T>() == 8) return hash64(u64(key));
}
return unreachable();
}

// FNV-1a 32-bit as a starting point, see: http://isthe.com/chongo/tech/comp/fnv/
// XXHash 64-bit, see: https://cyan4973.github.io/xxHash

// primes
// @ts-ignore: decorator
@inline const FNV_OFFSET: u32 = 2166136261;

@inline const XXH64_P1: u64 = 11400714785074694791;
// @ts-ignore: decorator
@inline const XXH64_P2: u64 = 14029467366897019727;
// @ts-ignore: decorator
@inline const FNV_PRIME: u32 = 16777619;
@inline const XXH64_P3: u64 = 1609587929392839161;
// @ts-ignore: decorator
@inline const XXH64_P4: u64 = 9650029242287828579;
// @ts-ignore: decorator
@inline const XXH64_P5: u64 = 2870177450012600261;
// @ts-ignore: decorator
@inline const XXH64_SEED: u64 = 0;

function hash8(key: u32): u32 {
return (FNV_OFFSET ^ key) * FNV_PRIME;
// @ts-ignore: decorator
@inline
function hash32(key: u32, len: u64 = 4): u64 {
var h: u64 = XXH64_SEED + XXH64_P5 + len;
h ^= u64(key) * XXH64_P1;
h = rotl(h, 23) * XXH64_P2 + XXH64_P3;
h ^= h >> 33;
h *= XXH64_P2;
h ^= h >> 29;
h *= XXH64_P3;
h ^= h >> 32;
return h;
}

function hash16(key: u32): u32 {
var v = FNV_OFFSET;
v = (v ^ ( key & 0xff)) * FNV_PRIME;
v = (v ^ ( key >> 8 )) * FNV_PRIME;
return v;
// @ts-ignore: decorator
@inline
function hash64(key: u64): u64 {
var h: u64 = XXH64_SEED + XXH64_P5 + 8;
h ^= rotl(key * XXH64_P2, 31) * XXH64_P1;
h = rotl(h, 27) * XXH64_P1 + XXH64_P4;
h ^= h >> 33;
h *= XXH64_P2;
h ^= h >> 29;
h *= XXH64_P3;
h ^= h >> 32;
return h;
}

function hash32(key: u32): u32 {
var v = FNV_OFFSET;
v = (v ^ ( key & 0xff)) * FNV_PRIME;
v = (v ^ ((key >> 8) & 0xff)) * FNV_PRIME;
v = (v ^ ((key >> 16) & 0xff)) * FNV_PRIME;
v = (v ^ ( key >> 24 )) * FNV_PRIME;
return v;
// @ts-ignore: decorator
@inline
function mix1(h: u64, key: u64): u64 {
return rotl(h + key * XXH64_P2, 31) * XXH64_P1;
}

function hash64(key: u64): u32 {
var l = <u32> key;
var h = <u32>(key >>> 32);
var v = FNV_OFFSET;
v = (v ^ ( l & 0xff)) * FNV_PRIME;
v = (v ^ ((l >> 8) & 0xff)) * FNV_PRIME;
v = (v ^ ((l >> 16) & 0xff)) * FNV_PRIME;
v = (v ^ ( l >> 24 )) * FNV_PRIME;
v = (v ^ ( h & 0xff)) * FNV_PRIME;
v = (v ^ ((h >> 8) & 0xff)) * FNV_PRIME;
v = (v ^ ((h >> 16) & 0xff)) * FNV_PRIME;
v = (v ^ ( h >> 24 )) * FNV_PRIME;
return v;
// @ts-ignore: decorator
@inline
function mix2(h: u64, s: u64): u64 {
return (h ^ (rotl(s, 31) * XXH64_P1)) * XXH64_P1 + XXH64_P4;
}

function hashStr(key: string): u32 {
var v = FNV_OFFSET;
if (key !== null) {
for (let i: usize = 0, k: usize = key.length << 1; i < k; ++i) {
v = (v ^ <u32>load<u8>(changetype<usize>(key) + i)) * FNV_PRIME;
// @ts-ignore: decorator
@inline
function hashStr(key: string): u64 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a reference for the code in this function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (key === null) {
return XXH64_SEED;
}

var len = key.length << 1;
var h: u64 = 0;
let i = 0;

if (len >= 32) {
let s1 = XXH64_SEED + XXH64_P1 + XXH64_P2;
let s2 = XXH64_SEED + XXH64_P2;
let s3 = XXH64_SEED;
let s4 = XXH64_SEED - XXH64_P1;
let ln = len;

let n = len - 32;
while (i <= n) {
s1 = mix1(s1, load<u64>(changetype<usize>(key) + i ));
s2 = mix1(s2, load<u64>(changetype<usize>(key) + i, 8));
s3 = mix1(s3, load<u64>(changetype<usize>(key) + i, 16));
s4 = mix1(s4, load<u64>(changetype<usize>(key) + i, 24));
i += 32;
}
h = rotl(s1, 1) + rotl(s2, 7) + rotl(s3, 12) + rotl(s4, 18);

s1 *= XXH64_P2;
s2 *= XXH64_P2;
s3 *= XXH64_P2;
s4 *= XXH64_P2;

h = mix2(h, s1);
h = mix2(h, s2);
h = mix2(h, s3);
h = mix2(h, s4);
h += <u64>ln;
} else {
h = <u64>len + XXH64_SEED + XXH64_P5;
}
return v;

var n = len - 8;
while (i <= n) {
h ^= rotl(load<u64>(changetype<usize>(key) + i) * XXH64_P2, 31) * XXH64_P1;
h = rotl(h, 27) * XXH64_P1 + XXH64_P4;
i += 8;
}

if (i + 4 <= len) {
h ^= <u64>load<u32>(changetype<usize>(key) + i) * XXH64_P1;
h = rotl(h, 23) * XXH64_P2 + XXH64_P3;
i += 4;
}

while (i < len) {
h += <u64>load<u8>(changetype<usize>(key) + i) * XXH64_P5;
h = rotl(h, 11) * XXH64_P1;
i++;
}

h ^= h >> 33;
h *= XXH64_P2;
h ^= h >> 29;
h *= XXH64_P3;
h ^= h >> 32;
return h;
}
Loading