From 4d6bc105f7ab6755d05c76f3387160bb8eec0ad8 Mon Sep 17 00:00:00 2001 From: Avi Bryant Date: Sat, 18 Feb 2012 20:40:19 -0800 Subject: [PATCH 1/2] hash(s::ByteString, seed::Uint32) It's occasionally useful to have control over the seed being used by the hash algorithm. This adds a hash() method with an extra parameter to allow this. Currently, it's 32bit only - not sure what the best way is to allow 64bit as well. --- j/table.j | 3 +++ src/support/hashing.c | 8 ++++++++ src/support/hashing.h | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/j/table.j b/j/table.j index b5429d38dcb77..a09a94f80f53c 100644 --- a/j/table.j +++ b/j/table.j @@ -129,6 +129,9 @@ else hash(s::ByteString) = ccall(:memhash32, Uint32, (Ptr{Void}, Int), s.data, length(s.data)) end +hash(s::ByteString, seed::Uint32) = ccall(:memhash32_seed, Uint32, (Ptr{Void}, Int, Uint32), s.data, length(s.data), seed) + + # hash table type HashTable{K,V} <: Associative diff --git a/src/support/hashing.c b/src/support/hashing.c index 0f5a74d370d40..4f369585c86c1 100644 --- a/src/support/hashing.c +++ b/src/support/hashing.c @@ -81,3 +81,11 @@ uint32_t memhash32(const char* buf, size_t n) MurmurHash3_x86_32(buf, n, _MHASH_SEED_, &out); return out; } + +uint32_t memhash32_seed(const char* buf, size_t n, uint32_t seed) +{ + uint32_t out; + + MurmurHash3_x86_32(buf, n, seed, &out); + return out; +} diff --git a/src/support/hashing.h b/src/support/hashing.h index 810b45b0fce31..c1b879ccde68a 100644 --- a/src/support/hashing.h +++ b/src/support/hashing.h @@ -12,5 +12,5 @@ DLLEXPORT u_int32_t int64to32hash(u_int64_t key); #endif DLLEXPORT u_int64_t memhash(const char* buf, size_t n); DLLEXPORT u_int32_t memhash32(const char* buf, size_t n); - +DLLEXPORT u_int32_t memhash32_seed(const char* buf, size_t n, u_int32_t seed); #endif From 2d76c20469fcaddad79ba02c6b9033d9c4e63aae Mon Sep 17 00:00:00 2001 From: Avi Bryant Date: Sat, 18 Feb 2012 23:06:32 -0800 Subject: [PATCH 2/2] 64bit version of memhash_seed The memhash_seed functions now choose 32 vs 64bit hashing the same way the fixed-seed ones do. Also added them to julia.expmap. --- j/table.j | 3 ++- src/julia.expmap | 2 ++ src/support/hashing.c | 13 +++++++++++++ src/support/hashing.h | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/j/table.j b/j/table.j index a09a94f80f53c..6e1a04b3ba554 100644 --- a/j/table.j +++ b/j/table.j @@ -125,11 +125,12 @@ hash(x::Any) = uid(x) if WORD_SIZE == 64 hash(s::ByteString) = ccall(:memhash, Uint64, (Ptr{Void}, Int), s.data, length(s.data)) +hash(s::ByteString, seed::Uint32) = ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), s.data, length(s.data), seed) else hash(s::ByteString) = ccall(:memhash32, Uint32, (Ptr{Void}, Int), s.data, length(s.data)) +hash(s::ByteString, seed::Uint32) = ccall(:memhash32_seed, Uint32, (Ptr{Void}, Int, Uint32), s.data, length(s.data), seed) end -hash(s::ByteString, seed::Uint32) = ccall(:memhash32_seed, Uint32, (Ptr{Void}, Int, Uint32), s.data, length(s.data), seed) # hash table diff --git a/src/julia.expmap b/src/julia.expmap index 44179f1d9e9f4..fabf4054951d3 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -4,7 +4,9 @@ int64hash; int64to32hash; memhash; + memhash_seed; memhash32; + memhash32_seed; jl_hash_symbol; jl_symbol_name; jl_uid; diff --git a/src/support/hashing.c b/src/support/hashing.c index 4f369585c86c1..e5a4d040e8621 100644 --- a/src/support/hashing.c +++ b/src/support/hashing.c @@ -74,6 +74,19 @@ uint64_t memhash(const char* buf, size_t n) return out[1]; } +uint64_t memhash_seed(const char* buf, size_t n, uint32_t seed) +{ + uint64_t out[2]; + + // TODO: expose 128-bit hash +#ifdef __LP64__ + MurmurHash3_x64_128(buf, n, seed, out); +#else + MurmurHash3_x86_128(buf, n, seed, out); +#endif + return out[1]; +} + uint32_t memhash32(const char* buf, size_t n) { uint32_t out; diff --git a/src/support/hashing.h b/src/support/hashing.h index c1b879ccde68a..f267f9dcbf519 100644 --- a/src/support/hashing.h +++ b/src/support/hashing.h @@ -11,6 +11,7 @@ DLLEXPORT u_int32_t int64to32hash(u_int64_t key); #define inthash int32hash #endif DLLEXPORT u_int64_t memhash(const char* buf, size_t n); +DLLEXPORT u_int64_t memhash_seed(const char* buf, size_t n, u_int32_t seed); DLLEXPORT u_int32_t memhash32(const char* buf, size_t n); DLLEXPORT u_int32_t memhash32_seed(const char* buf, size_t n, u_int32_t seed); #endif