2
2
// Use of this source code is governed by a BSD-style
3
3
// license that can be found in the LICENSE file.
4
4
5
- // Package hash/ maphash provides hash functions on byte sequences. These
6
- // hash functions are intended to be used to implement hash tables or
5
+ // Package maphash provides hash functions on byte sequences.
6
+ // These hash functions are intended to be used to implement hash tables or
7
7
// other data structures that need to map arbitrary strings or byte
8
- // sequences to a uniform distribution of integers. The hash functions
9
- // are collision-resistant but are not cryptographically secure (use
10
- // one of the hash functions in crypto/* if you need that).
8
+ // sequences to a uniform distribution of integers.
11
9
//
12
- // The produced hashes depend only on the sequence of bytes provided
13
- // to the Hash object, not on the way in which they are provided. For
14
- // example, the calls
15
- // h.AddString("foo")
16
- // h.AddBytes([]byte{'f','o','o'})
17
- // h.AddByte('f'); h.AddByte('o'); h.AddByte('o')
18
- // will all have the same effect.
19
- //
20
- // Two Hash instances in the same process using the same seed
21
- // behave identically.
22
- //
23
- // Two Hash instances with the same seed in different processes are
24
- // not guaranteed to behave identically, even if the processes share
25
- // the same binary.
26
- //
27
- // Hashes are intended to be collision-resistant, even for situations
28
- // where an adversary controls the byte sequences being hashed.
29
- // All bits of the Hash result are close to uniformly and
30
- // independently distributed, so can be safely restricted to a range
31
- // using bit masking, shifting, or modular arithmetic.
10
+ // The hash functions are collision-resistant but not cryptographically secure.
11
+ // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
32
12
package maphash
33
13
34
- import (
35
- "unsafe"
36
- )
14
+ import "unsafe"
37
15
38
- // A Seed controls the behavior of a Hash. Two Hash objects with the
39
- // same seed in the same process will behave identically. Two Hash
40
- // objects with different seeds will very likely behave differently.
16
+ // A Seed is a random value that selects the specific hash function
17
+ // computed by a Hash. If two Hashes use the same Seeds, they
18
+ // will compute the same hash values for any given input.
19
+ // If two Hashes use different Seeds, they are very likely to compute
20
+ // distinct hash values for any given input.
21
+ //
22
+ // A Seed must be initialized by calling MakeSeed.
23
+ // The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
24
+ //
25
+ // Each Seed value is local to a single process and cannot be serialized
26
+ // or otherwise recreated in a different process.
41
27
type Seed struct {
42
28
s uint64
43
29
}
44
30
45
- // A Hash object is used to compute the hash of a byte sequence.
31
+ // A Hash computes a seeded hash of a byte sequence.
32
+ //
33
+ // The zero Hash is a valid Hash ready to use.
34
+ // A zero Hash chooses a random seed for itself during
35
+ // the first call to a Reset, Write, Seed, Sum64, or Seed method.
36
+ // For control over the seed, use SetSeed.
37
+ //
38
+ // The computed hash values depend only on the initial seed and
39
+ // the sequence of bytes provided to the Hash object, not on the way
40
+ // in which the bytes are provided. For example, the three sequences
41
+ //
42
+ // h.Write([]byte{'f','o','o'})
43
+ // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
44
+ // h.WriteString("foo")
45
+ //
46
+ // all have the same effect.
47
+ //
48
+ // Hashes are intended to be collision-resistant, even for situations
49
+ // where an adversary controls the byte sequences being hashed.
50
+ //
51
+ // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
52
+ // If multiple goroutines must compute the same seeded hash,
53
+ // each can declare its own Hash and call SetSeed with a common Seed.
46
54
type Hash struct {
47
- seed Seed // initial seed used for this hash
48
- state Seed // current hash of all flushed bytes
49
- buf [64 ]byte // unflushed byte buffer
50
- n int // number of unflushed bytes
55
+ _ [0 ]func () // not comparable
56
+ seed Seed // initial seed used for this hash
57
+ state Seed // current hash of all flushed bytes
58
+ buf [64 ]byte // unflushed byte buffer
59
+ n int // number of unflushed bytes
60
+ }
61
+
62
+ // initSeed seeds the hash if necessary.
63
+ // initSeed is called lazily before any operation that actually uses h.seed/h.state.
64
+ // Note that this does not include Write/WriteByte/WriteString in the case
65
+ // where they only add to h.buf. (If they write too much, they call h.flush,
66
+ // which does call h.initSeed.)
67
+ func (h * Hash ) initSeed () {
68
+ if h .seed .s == 0 {
69
+ h .SetSeed (MakeSeed ())
70
+ }
51
71
}
52
72
53
- // AddByte adds b to the sequence of bytes hashed by h.
54
- func (h * Hash ) AddByte (b byte ) {
73
+ // WriteByte adds b to the sequence of bytes hashed by h.
74
+ // It never fails; the error result is for implementing io.ByteWriter.
75
+ func (h * Hash ) WriteByte (b byte ) error {
55
76
if h .n == len (h .buf ) {
56
77
h .flush ()
57
78
}
58
79
h .buf [h .n ] = b
59
80
h .n ++
81
+ return nil
60
82
}
61
83
62
- // AddBytes adds b to the sequence of bytes hashed by h.
63
- func (h * Hash ) AddBytes (b []byte ) {
84
+ // Write adds b to the sequence of bytes hashed by h.
85
+ // It always writes all of b and never fails; the count and error result are for implementing io.Writer.
86
+ func (h * Hash ) Write (b []byte ) (int , error ) {
87
+ size := len (b )
64
88
for h .n + len (b ) > len (h .buf ) {
65
89
k := copy (h .buf [h .n :], b )
66
90
h .n = len (h .buf )
67
91
b = b [k :]
68
92
h .flush ()
69
93
}
70
94
h .n += copy (h .buf [h .n :], b )
95
+ return size , nil
71
96
}
72
97
73
- // AddString adds the bytes of s to the sequence of bytes hashed by h.
74
- func (h * Hash ) AddString (s string ) {
98
+ // WriteString adds the bytes of s to the sequence of bytes hashed by h.
99
+ // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
100
+ func (h * Hash ) WriteString (s string ) (int , error ) {
101
+ size := len (s )
75
102
for h .n + len (s ) > len (h .buf ) {
76
103
k := copy (h .buf [h .n :], s )
77
104
h .n = len (h .buf )
78
105
s = s [k :]
79
106
h .flush ()
80
107
}
81
108
h .n += copy (h .buf [h .n :], s )
109
+ return size , nil
82
110
}
83
111
84
- // Seed returns the seed value specified in the most recent call to
85
- // SetSeed, or the initial seed if SetSeed was never called.
112
+ // Seed returns h's seed value.
86
113
func (h * Hash ) Seed () Seed {
114
+ h .initSeed ()
87
115
return h .seed
88
116
}
89
117
90
- // SetSeed sets the seed used by h. Two Hash objects with the same
91
- // seed in the same process will behave identically. Two Hash objects
92
- // with different seeds will very likely behave differently. Any
93
- // bytes added to h previous to this call will be discarded.
118
+ // SetSeed sets h to use seed, which must have been returned by MakeSeed
119
+ // or by another Hash's Seed method.
120
+ // Two Hash objects with the same seed behave identically.
121
+ // Two Hash objects with different seeds will very likely behave differently.
122
+ // Any bytes added to h before this call will be discarded.
94
123
func (h * Hash ) SetSeed (seed Seed ) {
124
+ if seed .s == 0 {
125
+ panic ("maphash: use of uninitialized Seed" )
126
+ }
95
127
h .seed = seed
96
128
h .state = seed
97
129
h .n = 0
@@ -100,43 +132,46 @@ func (h *Hash) SetSeed(seed Seed) {
100
132
// Reset discards all bytes added to h.
101
133
// (The seed remains the same.)
102
134
func (h * Hash ) Reset () {
135
+ h .initSeed ()
103
136
h .state = h .seed
104
137
h .n = 0
105
138
}
106
139
107
140
// precondition: buffer is full.
108
141
func (h * Hash ) flush () {
109
142
if h .n != len (h .buf ) {
110
- panic ("flush of partially full buffer" )
143
+ panic ("maphash: flush of partially full buffer" )
111
144
}
145
+ h .initSeed ()
112
146
h .state .s = rthash (h .buf [:], h .state .s )
113
147
h .n = 0
114
148
}
115
149
116
- // Hash returns a value which depends on h's seed and the sequence of
117
- // bytes added to h (since the last call to Reset or SetSeed).
118
- func (h * Hash ) Hash () uint64 {
150
+ // Sum64 returns h's current 64-bit value, which depends on
151
+ // h's seed and the sequence of bytes added to h since the
152
+ // last call to Reset or SetSeed.
153
+ //
154
+ // All bits of the Sum64 result are close to uniformly and
155
+ // independently distributed, so it can be safely reduced
156
+ // by using bit masking, shifting, or modular arithmetic.
157
+ func (h * Hash ) Sum64 () uint64 {
158
+ h .initSeed ()
119
159
return rthash (h .buf [:h .n ], h .state .s )
120
160
}
121
161
122
- // MakeSeed returns a Seed initialized using the bits in s.
123
- // Two seeds generated with the same s are guaranteed to be equal.
124
- // Two seeds generated with different s are very likely to be different.
125
- // TODO: disallow this? See Alan's comment in the issue.
126
- func MakeSeed (s uint64 ) Seed {
127
- return Seed {s : s }
128
- }
129
-
130
- // New returns a new Hash object. Different hash objects allocated by
131
- // this function will very likely have different seeds.
132
- func New () * Hash {
133
- s1 := uint64 (runtime_fastrand ())
134
- s2 := uint64 (runtime_fastrand ())
135
- seed := Seed {s : s1 << 32 + s2 }
136
- return & Hash {
137
- seed : seed ,
138
- state : seed ,
162
+ // MakeSeed returns a new random seed.
163
+ func MakeSeed () Seed {
164
+ var s1 , s2 uint64
165
+ for {
166
+ s1 = uint64 (runtime_fastrand ())
167
+ s2 = uint64 (runtime_fastrand ())
168
+ // We use seed 0 to indicate an uninitialized seed/hash,
169
+ // so keep trying until we get a non-zero seed.
170
+ if s1 | s2 != 0 {
171
+ break
172
+ }
139
173
}
174
+ return Seed {s : s1 << 32 + s2 }
140
175
}
141
176
142
177
//go:linkname runtime_fastrand runtime.fastrand
@@ -154,22 +189,17 @@ func rthash(b []byte, seed uint64) uint64 {
154
189
}
155
190
lo := runtime_memhash (unsafe .Pointer (& b [0 ]), uintptr (seed ), uintptr (len (b )))
156
191
hi := runtime_memhash (unsafe .Pointer (& b [0 ]), uintptr (seed >> 32 ), uintptr (len (b )))
157
- // TODO: mix lo/hi? Get 64 bits some other way?
158
192
return uint64 (hi )<< 32 | uint64 (lo )
159
193
}
160
194
161
195
//go:linkname runtime_memhash runtime.memhash
162
196
func runtime_memhash (p unsafe.Pointer , seed , s uintptr ) uintptr
163
197
164
- // Wrapper functions so that a hash/maphash.Hash implements
165
- // the hash.Hash and hash.Hash64 interfaces.
166
-
167
- func (h * Hash ) Write (b []byte ) (int , error ) {
168
- h .AddBytes (b )
169
- return len (b ), nil
170
- }
198
+ // Sum appends the hash's current 64-bit value to b.
199
+ // It exists for implementing hash.Hash.
200
+ // For direct calls, it is more efficient to use Sum64.
171
201
func (h * Hash ) Sum (b []byte ) []byte {
172
- x := h .Hash ()
202
+ x := h .Sum64 ()
173
203
return append (b ,
174
204
byte (x >> 0 ),
175
205
byte (x >> 8 ),
@@ -180,8 +210,9 @@ func (h *Hash) Sum(b []byte) []byte {
180
210
byte (x >> 48 ),
181
211
byte (x >> 56 ))
182
212
}
183
- func (h * Hash ) Sum64 () uint64 {
184
- return h .Hash ()
185
- }
186
- func (h * Hash ) Size () int { return 8 }
213
+
214
+ // Size returns h's hash value size, 8 bytes.
215
+ func (h * Hash ) Size () int { return 8 }
216
+
217
+ // BlockSize returns h's block size.
187
218
func (h * Hash ) BlockSize () int { return len (h .buf ) }
0 commit comments