Skip to content

Commit 0127ceb

Browse files
committed
Code for gathering stats about extremely slow HashMap operations
1 parent 76c3875 commit 0127ceb

File tree

5 files changed

+65
-2
lines changed

5 files changed

+65
-2
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ repository = "https://github.com/carllerche/hashmap2"
3636
homepage = "https://github.com/carllerche/hashmap2"
3737

3838
[dependencies]
39-
rand = "0.3.12"
39+
rand = "0.3.15"

examples/stats.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
extern crate hashmap2;
2+
extern crate rand;
3+
4+
use hashmap2::HashMap;
5+
use rand::Rng;
6+
7+
fn main() {
8+
let mut map: HashMap<i32, ()> = HashMap::new();
9+
assert_eq!(map.len(), 0);
10+
let mut rng = rand::weak_rng();
11+
let mut iter = rng.gen_iter();
12+
let len = 2 << 20;
13+
let usable_cap = (len as f32 * 0.833) as usize;
14+
let mut stats = vec![];
15+
for _ in 0..10000 {
16+
while map.len() < usable_cap {
17+
map.insert(iter.next().unwrap(), ());
18+
}
19+
map.stats(&mut stats);
20+
map.clear();
21+
}
22+
for (i, (displacement, forward_shift)) in stats.into_iter().enumerate() {
23+
println!("{}: {}\t{}", i, displacement, forward_shift);
24+
}
25+
println!("map len={:?} capacity={:?}", map.len(), map.capacity());
26+
}

src/adaptive_map.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use HashMap;
2222

2323
// Beyond this displacement, we switch to safe hashing or grow the table.
2424
const DISPLACEMENT_THRESHOLD: usize = 128;
25-
const FORWARD_SHIFT_THRESHOLD: usize = 512;
25+
const FORWARD_SHIFT_THRESHOLD: usize = 1024;
2626
// When the map's load factor is below this threshold, we switch to safe hashing.
2727
// Otherwise, we grow the table.
2828
// const LOAD_FACTOR_THRESHOLD: f32 = 0.625;

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,10 @@ impl<K, V, S> HashMap<K, V, S>
12391239

12401240
self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket))
12411241
}
1242+
1243+
pub fn stats(&self, stats: &mut Vec<(u64, u64)>) {
1244+
self.table.stats(stats);
1245+
}
12421246
}
12431247

12441248
// Not copying this requires specialization

src/table.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,39 @@ fn test_offset_calculation() {
590590
}
591591

592592
impl<K, V> RawTable<K, V> {
593+
pub fn stats(&self, stats: &mut Vec<(u64, u64)>) {
594+
if stats.is_empty() {
595+
*stats = vec![(0, 0); 2000];
596+
}
597+
// stats.clear();
598+
// stats.extend(iter::repeat((0, 0)).take(1000));
599+
let mut iter = self.raw_buckets();
600+
let first_hash = iter.raw.hash;
601+
let mut latest_hash = iter.raw.hash;
602+
let mut chunk_info = vec![];
603+
while let Some(raw) = iter.next() {
604+
let num_empty = (raw.hash as usize - latest_hash as usize) / size_of::<u64>();
605+
let idx = (raw.hash as usize - first_hash as usize) / size_of::<u64>();
606+
stats[0].0 += num_empty as u64;
607+
stats[0].1 += num_empty as u64;
608+
if num_empty > 0 {
609+
for n in chunk_info.drain(..) {
610+
stats[n as usize].1 += 1;
611+
}
612+
} else {
613+
for n in chunk_info.iter_mut() {
614+
*n += 1;
615+
}
616+
}
617+
chunk_info.push(0);
618+
let ib = unsafe {
619+
(*raw.hash) as usize & (self.capacity - 1)
620+
};
621+
let displacement = (idx as isize - ib as isize) as usize & (self.capacity - 1);
622+
stats[displacement].0 += 1;
623+
latest_hash = iter.raw.hash;
624+
}
625+
}
593626
/// Does not initialize the buckets. The caller should ensure they,
594627
/// at the very least, set every hash to EMPTY_BUCKET.
595628
unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> {

0 commit comments

Comments
 (0)