Skip to content

[std] Improve string hashing #1579

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions std/assembly/map.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/// <reference path="./rt/index.d.ts" />

import { HASH } from "./util/hash";
import { hash } from "./util/hash";
import { E_KEYNOTFOUND } from "./util/error";

// A deterministic hash map based on CloseTable from https://github.com/jorendorff/dht
Expand Down Expand Up @@ -96,19 +96,19 @@ export class Map<K,V> {
}

has(key: K): bool {
return this.find(key, HASH<K>(key)) !== null;
return this.find(key, hash<K>(key)) !== null;
}

@operator("[]")
get(key: K): V {
var entry = this.find(key, HASH<K>(key));
var entry = this.find(key, hash<K>(key));
if (!entry) throw new Error(E_KEYNOTFOUND); // cannot represent `undefined`
return entry.value;
}

@operator("[]=")
set(key: K, value: V): this {
var hashCode = HASH<K>(key);
var hashCode = hash<K>(key);
var entry = this.find(key, hashCode); // unmanaged!
if (entry) {
if (isManaged<V>()) {
Expand Down Expand Up @@ -149,7 +149,7 @@ export class Map<K,V> {
}

delete(key: K): bool {
var entry = this.find(key, HASH<K>(key));
var entry = this.find(key, hash<K>(key));
if (!entry) return false;
if (isManaged<K>()) __release(changetype<usize>(entry.key));
if (isManaged<V>()) __release(changetype<usize>(entry.value));
Expand Down Expand Up @@ -181,7 +181,7 @@ export class Map<K,V> {
let oldEntryKey = oldEntry.key;
newEntry.key = oldEntryKey;
newEntry.value = oldEntry.value;
let newBucketIndex = HASH<K>(oldEntryKey) & newBucketsMask;
let newBucketIndex = hash<K>(oldEntryKey) & newBucketsMask;
let newBucketPtrBase = changetype<usize>(newBuckets) + <usize>newBucketIndex * BUCKET_SIZE;
newEntry.taggedNext = load<usize>(newBucketPtrBase);
store<usize>(newBucketPtrBase, newPtr);
Expand Down
10 changes: 5 additions & 5 deletions std/assembly/set.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/// <reference path="./rt/index.d.ts" />

import { HASH } from "./util/hash";
import { hash } from "./util/hash";

// A deterministic hash set based on CloseTable from https://github.com/jorendorff/dht

Expand Down Expand Up @@ -94,11 +94,11 @@ export class Set<T> {

@operator("[]")
has(key: T): bool {
return this.find(key, HASH<T>(key)) !== null;
return this.find(key, hash<T>(key)) !== null;
}

add(key: T): this {
var hashCode = HASH<T>(key);
var hashCode = hash<T>(key);
var entry = this.find(key, hashCode); // unmanaged!
if (!entry) {
// check if rehashing is necessary
Expand Down Expand Up @@ -130,7 +130,7 @@ export class Set<T> {
}

delete(key: T): bool {
var entry = this.find(key, HASH<T>(key)); // unmanaged!
var entry = this.find(key, hash<T>(key)); // unmanaged!
if (!entry) return false;
if (isManaged<T>()) __release(changetype<usize>(entry.key)); // exact 'key'
entry.taggedNext |= EMPTY;
Expand Down Expand Up @@ -160,7 +160,7 @@ export class Set<T> {
let newEntry = changetype<SetEntry<T>>(newPtr); // unmanaged!
let oldEntryKey = oldEntry.key;
newEntry.key = oldEntryKey;
let newBucketIndex = HASH<T>(oldEntryKey) & newBucketsMask;
let newBucketIndex = hash<T>(oldEntryKey) & newBucketsMask;
let newBucketPtrBase = changetype<usize>(newBuckets) + <usize>newBucketIndex * BUCKET_SIZE;
newEntry.taggedNext = load<usize>(newBucketPtrBase);
store<usize>(newBucketPtrBase, newPtr);
Expand Down
56 changes: 41 additions & 15 deletions std/assembly/util/hash.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// @ts-ignore: decorator
@inline
export function HASH<T>(key: T): u32 {
export function hash<T>(key: T): u32 {
if (isString<T>()) {
return hashStr(changetype<string>(key));
} else if (isReference<T>()) {
Expand All @@ -26,30 +24,39 @@ export function HASH<T>(key: T): u32 {
// @ts-ignore: decorator
@inline const FNV_PRIME: u32 = 16777619;

function hash8(key: u32): u32 {
return (FNV_OFFSET ^ key) * FNV_PRIME;

// @ts-ignore: decorator
@inline
function hash8(key: u32, seed: u32 = FNV_OFFSET): u32 {
return (seed ^ key) * FNV_PRIME;
}

function hash16(key: u32): u32 {
var v = FNV_OFFSET;
// @ts-ignore: decorator
@inline
function hash16(key: u32, seed: u32 = FNV_OFFSET): u32 {
var v = seed;
v = (v ^ ( key & 0xff)) * FNV_PRIME;
v = (v ^ ( key >> 8 )) * FNV_PRIME;
return v;
}

function hash32(key: u32): u32 {
var v = FNV_OFFSET;
// @ts-ignore: decorator
@inline
function hash32(key: u32, seed: u32 = FNV_OFFSET): u32 {
var v = seed;
v = (v ^ ( key & 0xff)) * FNV_PRIME;
v = (v ^ ((key >> 8) & 0xff)) * FNV_PRIME;
v = (v ^ ((key >> 16) & 0xff)) * FNV_PRIME;
v = (v ^ ( key >> 24 )) * FNV_PRIME;
return v;
}

function hash64(key: u64): u32 {
// @ts-ignore: decorator
@inline
function hash64(key: u64, seed: u32 = FNV_OFFSET): u32 {
var l = <u32> key;
var h = <u32>(key >>> 32);
var v = FNV_OFFSET;
var v = seed;
v = (v ^ ( l & 0xff)) * FNV_PRIME;
v = (v ^ ((l >> 8) & 0xff)) * FNV_PRIME;
v = (v ^ ((l >> 16) & 0xff)) * FNV_PRIME;
Expand All @@ -61,11 +68,30 @@ function hash64(key: u64): u32 {
return v;
}

function hashStr(key: string): u32 {
var v = FNV_OFFSET;
// @ts-ignore: decorator
@inline
function hashStr(key: string, seed: u32 = FNV_OFFSET): u32 {
var v = seed;
if (key !== null) {
for (let i: usize = 0, k: usize = key.length << 1; i < k; ++i) {
v = (v ^ <u32>load<u8>(changetype<usize>(key) + i)) * FNV_PRIME;
let len = key.length << 1;
if (ASC_SHRINK_LEVEL > 1) {
for (let i: usize = 0; i < len; ++i) {
v = (v ^ <u32>load<u8>(changetype<usize>(key) + i)) * FNV_PRIME;
}
} else {
let off: usize = 0;
while (len >= 8) {
v = hash64(load<u64>(changetype<usize>(key) + off), v);
off += 8; len -= 8;
}
if (len >= 4) {
v = hash32(load<u32>(changetype<usize>(key) + off), v);
off += 4; len -= 4;
}
if (len >= 2) {
v = hash16(load<u16>(changetype<usize>(key) + off), v);
}
// "len" always even so don't need hash8
}
}
return v;
Expand Down
172 changes: 157 additions & 15 deletions tests/compiler/std/hash.optimized.wat
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
(data (i32.const 1068) "\02\00\00\00\01\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00a")
(data (i32.const 1100) "\04\00\00\00\01\00\00\00\00\00\00\00\01\00\00\00\04\00\00\00a\00b")
(data (i32.const 1132) "\06\00\00\00\01\00\00\00\00\00\00\00\01\00\00\00\06\00\00\00a\00b\00c")
(data (i32.const 1164) "\08\00\00\00\01\00\00\00\00\00\00\00\01\00\00\00\08\00\00\00a\00b\00c\00d")
(data (i32.const 1196) "\n\00\00\00\01\00\00\00\00\00\00\00\01\00\00\00\n\00\00\00a\00b\00c\00d\00e")
(export "memory" (memory $0))
(start $~start)
(func $~lib/util/hash/hashStr (param $0 i32)
(func $~lib/util/hash/hash<~lib/string/String|null> (param $0 i32)
(local $1 i32)
(local $2 i32)
(local $3 i32)
(local $4 i64)
i32.const -2128831035
local.set $2
local.get $0
Expand All @@ -24,40 +27,179 @@
i32.shr_u
i32.const 1
i32.shl
local.set $3
loop $for-loop|0
local.set $1
loop $while-continue|0
local.get $1
local.get $3
i32.lt_u
i32.const 8
i32.ge_s
if
local.get $2
local.get $0
local.get $1
local.get $3
i32.add
i32.load8_u
i64.load
local.tee $4
i32.wrap_i64
local.tee $2
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 8
i32.shr_u
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 16
i32.shr_u
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 24
i32.shr_u
i32.xor
i32.const 16777619
i32.mul
local.get $4
i64.const 32
i64.shr_u
i32.wrap_i64
local.tee $2
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 8
i32.shr_u
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 16
i32.shr_u
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 24
i32.shr_u
i32.xor
i32.const 16777619
i32.mul
local.set $2
local.get $1
i32.const 1
local.get $3
i32.const 8
i32.add
local.set $3
local.get $1
i32.const 8
i32.sub
local.set $1
br $for-loop|0
br $while-continue|0
end
end
local.get $1
i32.const 4
i32.ge_s
if (result i32)
local.get $2
local.get $0
local.get $3
i32.add
i32.load
local.tee $2
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 8
i32.shr_u
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 16
i32.shr_u
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $2
i32.const 24
i32.shr_u
i32.xor
i32.const 16777619
i32.mul
local.set $2
local.get $3
i32.const 4
i32.add
local.set $3
local.get $1
i32.const 4
i32.sub
else
local.get $1
end
i32.const 2
i32.ge_s
if (result i32)
local.get $2
local.get $0
local.get $3
i32.add
i32.load16_u
local.tee $0
i32.const 255
i32.and
i32.xor
i32.const 16777619
i32.mul
local.get $0
i32.const 8
i32.shr_u
i32.xor
i32.const 16777619
i32.mul
else
local.get $2
end
drop
end
)
(func $~start
i32.const 0
call $~lib/util/hash/hashStr
call $~lib/util/hash/hash<~lib/string/String|null>
i32.const 1056
call $~lib/util/hash/hashStr
call $~lib/util/hash/hash<~lib/string/String|null>
i32.const 1088
call $~lib/util/hash/hashStr
call $~lib/util/hash/hash<~lib/string/String|null>
i32.const 1120
call $~lib/util/hash/hashStr
call $~lib/util/hash/hash<~lib/string/String|null>
i32.const 1152
call $~lib/util/hash/hashStr
call $~lib/util/hash/hash<~lib/string/String|null>
i32.const 1184
call $~lib/util/hash/hash<~lib/string/String|null>
i32.const 1216
call $~lib/util/hash/hash<~lib/string/String|null>
)
)
Loading