@@ -8,6 +8,13 @@ use std::ptr;
8
8
#[ cfg( test) ]
9
9
mod tests;
10
10
11
+ /// When hashing something that ends up affecting properties like symbol names,
12
+ /// we want these symbol names to be calculated independently of other factors
13
+ /// like what architecture you're compiling *from*.
14
+ ///
15
+ /// To that end, we always convert integers to little-endian format or handle
16
+ /// them in an endian-independent way, and extend the architecture-dependent
17
+ /// `isize` and `usize` types to 64 bits if needed before hashing.
11
18
#[ derive( Debug , Clone ) ]
12
19
pub struct SipHasher128 {
13
20
k0 : u64 ,
@@ -125,15 +132,17 @@ impl SipHasher128 {
125
132
126
133
// A specialized write function for values with size <= 8.
127
134
//
128
- // The hashing of multi-byte integers depends on endianness. E.g.:
129
- // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
130
- // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
135
+ // The input must be zero-extended to 64-bits by the caller. The extension
136
+ // isn't hashed, but the implementation requires it for correctness.
137
+ //
138
+ // This function, given the same integer type and value, has the same effect
139
+ // on both little- and big-endian hardware. It operates on values without
140
+ // depending on their sequence in memory, so is independent of endianness.
131
141
//
132
- // This function does the right thing for little-endian hardware. On
133
- // big-endian hardware `x` must be byte-swapped first to give the right
134
- // behaviour. After any byte-swapping, the input must be zero-extended to
135
- // 64-bits. The caller is responsible for the byte-swapping and
136
- // zero-extension.
142
+ // The equivalent write() call *does* need the value's bytes converted to
143
+ // little-endian (without zero-extension) for equivalent behavior on little-
144
+ // and big-endian hardware, as write() *does* operate on byte sequences.
145
+ // I.e. write_u32(0xDDCCBBAA) == write(&0xDDCCBBAA_u32.to_le_bytes()).
137
146
#[ inline]
138
147
fn short_write < T > ( & mut self , _x : T , x : u64 ) {
139
148
let size = mem:: size_of :: < T > ( ) ;
@@ -167,12 +176,9 @@ impl SipHasher128 {
167
176
// left-shift it five bytes, giving 0xHHGG_FF00_0000_0000. We then
168
177
// bitwise-OR that value into `self.tail`, resulting in
169
178
// 0xHHGG_FFEE_DDCC_BBAA. `self.tail` is now full, and we can use it
170
- // to update `self.state`. (As mentioned above, this assumes a
171
- // little-endian machine; on a big-endian machine we would have
172
- // byte-swapped 0xIIHH_GGFF in the caller, giving 0xFFGG_HHII, and we
173
- // would then end up bitwise-ORing 0xGGHH_II00_0000_0000 into
174
- // `self.tail`).
175
- //
179
+ // to update `self.state`. The analysis is the same whether we are on
180
+ // a little-endian or big-endian machine, as the bitwise operations
181
+ // are endian-independent.
176
182
self . tail |= x << ( 8 * self . ntail ) ;
177
183
if size < needed {
178
184
self . ntail += size;
@@ -186,8 +192,7 @@ impl SipHasher128 {
186
192
187
193
// Continuing scenario 2: we have one byte left over from the input. We
188
194
// set `self.ntail` to 1 and `self.tail` to `0x0000_0000_IIHH_GGFF >>
189
- // 8*3`, which is 0x0000_0000_0000_00II. (Or on a big-endian machine
190
- // the prior byte-swapping would leave us with 0x0000_0000_0000_00FF.)
195
+ // 8*3`, which is 0x0000_0000_0000_00II.
191
196
//
192
197
// The `if` is needed to avoid shifting by 64 bits, which Rust
193
198
// complains about.
@@ -222,22 +227,30 @@ impl Hasher for SipHasher128 {
222
227
223
228
#[ inline]
224
229
fn write_u16 ( & mut self , i : u16 ) {
225
- self . short_write ( i, i. to_le ( ) as u64 ) ;
230
+ self . short_write ( i, i as u64 ) ;
226
231
}
227
232
228
233
#[ inline]
229
234
fn write_u32 ( & mut self , i : u32 ) {
230
- self . short_write ( i, i. to_le ( ) as u64 ) ;
235
+ self . short_write ( i, i as u64 ) ;
231
236
}
232
237
233
238
#[ inline]
234
239
fn write_u64 ( & mut self , i : u64 ) {
235
- self . short_write ( i, i. to_le ( ) as u64 ) ;
240
+ self . short_write ( i, i as u64 ) ;
241
+ }
242
+
243
+ #[ inline]
244
+ fn write_u128 ( & mut self , i : u128 ) {
245
+ self . write ( & i. to_le_bytes ( ) ) ;
236
246
}
237
247
238
248
#[ inline]
239
249
fn write_usize ( & mut self , i : usize ) {
240
- self . short_write ( i, i. to_le ( ) as u64 ) ;
250
+ // Always treat usize as u64 so we get the same results on 32 and 64 bit
251
+ // platforms. This is important for symbol hashes when cross compiling,
252
+ // for example.
253
+ self . write_u64 ( i as u64 ) ;
241
254
}
242
255
243
256
#[ inline]
@@ -247,22 +260,31 @@ impl Hasher for SipHasher128 {
247
260
248
261
#[ inline]
249
262
fn write_i16 ( & mut self , i : i16 ) {
250
- self . short_write ( i, ( i as u16 ) . to_le ( ) as u64 ) ;
263
+ self . short_write ( i, i as u16 as u64 ) ;
251
264
}
252
265
253
266
#[ inline]
254
267
fn write_i32 ( & mut self , i : i32 ) {
255
- self . short_write ( i, ( i as u32 ) . to_le ( ) as u64 ) ;
268
+ self . short_write ( i, i as u32 as u64 ) ;
256
269
}
257
270
258
271
#[ inline]
259
272
fn write_i64 ( & mut self , i : i64 ) {
260
- self . short_write ( i, ( i as u64 ) . to_le ( ) as u64 ) ;
273
+ self . short_write ( i, i as u64 ) ;
274
+ }
275
+
276
+ #[ inline]
277
+ fn write_i128 ( & mut self , i : i128 ) {
278
+ self . write ( & i. to_le_bytes ( ) ) ;
261
279
}
262
280
263
281
#[ inline]
264
282
fn write_isize ( & mut self , i : isize ) {
265
- self . short_write ( i, ( i as usize ) . to_le ( ) as u64 ) ;
283
+ // Always treat isize as i64 so we get the same results on 32 and 64 bit
284
+ // platforms. This is important for symbol hashes when cross compiling,
285
+ // for example. Sign extending here is preferable as it means that the
286
+ // same negative number hashes the same on both 32 and 64 bit platforms.
287
+ self . write_i64 ( i as i64 ) ;
266
288
}
267
289
268
290
#[ inline]
0 commit comments