From 6a6885c6bd1d44969ced14ab7f3ea9d543bf14a2 Mon Sep 17 00:00:00 2001 From: Cheng XU Date: Sat, 28 Aug 2021 17:17:27 -0700 Subject: [PATCH 1/3] add benchmark for BTreeMap::from_iter --- library/alloc/benches/btree/map.rs | 50 ++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/library/alloc/benches/btree/map.rs b/library/alloc/benches/btree/map.rs index 920a5ca7db067..c304f74884721 100644 --- a/library/alloc/benches/btree/map.rs +++ b/library/alloc/benches/btree/map.rs @@ -54,6 +54,50 @@ macro_rules! map_insert_seq_bench { }; } +macro_rules! map_from_iter_rand_bench { + ($name: ident, $n: expr, $map: ident) => { + #[bench] + pub fn $name(b: &mut Bencher) { + let n: usize = $n; + // setup + let mut rng = thread_rng(); + let mut vec = Vec::with_capacity(n); + + for _ in 0..n { + let i = rng.gen::() % n; + vec.push((i, i)); + } + + // measure + b.iter(|| { + let map: $map<_, _> = vec.iter().copied().collect(); + black_box(map); + }); + } + }; +} + +macro_rules! map_from_iter_seq_bench { + ($name: ident, $n: expr, $map: ident) => { + #[bench] + pub fn $name(b: &mut Bencher) { + let n: usize = $n; + // setup + let mut vec = Vec::with_capacity(n); + + for i in 0..n { + vec.push((i, i)); + } + + // measure + b.iter(|| { + let map: $map<_, _> = vec.iter().copied().collect(); + black_box(map); + }); + } + }; +} + macro_rules! map_find_rand_bench { ($name: ident, $n: expr, $map: ident) => { #[bench] @@ -111,6 +155,12 @@ map_insert_rand_bench! {insert_rand_10_000, 10_000, BTreeMap} map_insert_seq_bench! {insert_seq_100, 100, BTreeMap} map_insert_seq_bench! {insert_seq_10_000, 10_000, BTreeMap} +map_from_iter_rand_bench! {from_iter_rand_100, 100, BTreeMap} +map_from_iter_rand_bench! {from_iter_rand_10_000, 10_000, BTreeMap} + +map_from_iter_seq_bench! {from_iter_seq_100, 100, BTreeMap} +map_from_iter_seq_bench! {from_iter_seq_10_000, 10_000, BTreeMap} + map_find_rand_bench! {find_rand_100, 100, BTreeMap} map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap} From cf814d60f82723e5965763859c51b3e7bd885b9b Mon Sep 17 00:00:00 2001 From: Cheng XU Date: Sat, 28 Aug 2021 16:48:45 -0700 Subject: [PATCH 2/3] BTreeMap::from_iter: use bulk building to improve the performance Bulk building is a common technique to increase the performance of building a fresh btree map. Instead of inserting items one-by-one, we sort all the items beforehand then create the BtreeMap in bulk. --- .../collections/btree/dedup_sorted_iter.rs | 47 +++++++++++++++++++ library/alloc/src/collections/btree/map.rs | 36 ++++++++++++-- library/alloc/src/collections/btree/mod.rs | 1 + 3 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 library/alloc/src/collections/btree/dedup_sorted_iter.rs diff --git a/library/alloc/src/collections/btree/dedup_sorted_iter.rs b/library/alloc/src/collections/btree/dedup_sorted_iter.rs new file mode 100644 index 0000000000000..60bf83b8387c3 --- /dev/null +++ b/library/alloc/src/collections/btree/dedup_sorted_iter.rs @@ -0,0 +1,47 @@ +use core::iter::Peekable; + +/// A iterator for deduping the key of a sorted iterator. +/// When encountering the duplicated key, only the last key-value pair is yielded. +/// +/// Used by [`BTreeMap::bulk_build_from_sorted_iter`]. +pub struct DedupSortedIter +where + I: Iterator, +{ + iter: Peekable, +} + +impl DedupSortedIter +where + I: Iterator, +{ + pub fn new(iter: I) -> Self { + Self { iter: iter.peekable() } + } +} + +impl Iterator for DedupSortedIter +where + K: Eq, + I: Iterator, +{ + type Item = (K, V); + + fn next(&mut self) -> Option<(K, V)> { + loop { + let next = match self.iter.next() { + Some(next) => next, + None => return None, + }; + + let peeked = match self.iter.peek() { + Some(peeked) => peeked, + None => return Some(next), + }; + + if next.0 != peeked.0 { + return Some(next); + } + } + } +} diff --git a/library/alloc/src/collections/btree/map.rs b/library/alloc/src/collections/btree/map.rs index 4b649e43371de..5e60851aec8c9 100644 --- a/library/alloc/src/collections/btree/map.rs +++ b/library/alloc/src/collections/btree/map.rs @@ -1,3 +1,4 @@ +use crate::vec::Vec; use core::borrow::Borrow; use core::cmp::Ordering; use core::fmt::{self, Debug}; @@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds}; use core::ptr; use super::borrow::DormantMutRef; +use super::dedup_sorted_iter::DedupSortedIter; use super::navigate::{LazyLeafRange, LeafRange}; use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root}; use super::search::SearchResult::*; @@ -1290,6 +1292,18 @@ impl BTreeMap { pub fn into_values(self) -> IntoValues { IntoValues { inner: self.into_iter() } } + + /// Makes a `BTreeMap` from a sorted iterator. + pub(crate) fn bulk_build_from_sorted_iter(iter: I) -> Self + where + K: Ord, + I: Iterator, + { + let mut root = Root::new(); + let mut length = 0; + root.bulk_push(DedupSortedIter::new(iter), &mut length); + BTreeMap { root: Some(root), length } + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -1914,9 +1928,15 @@ impl FusedIterator for RangeMut<'_, K, V> {} #[stable(feature = "rust1", since = "1.0.0")] impl FromIterator<(K, V)> for BTreeMap { fn from_iter>(iter: T) -> BTreeMap { - let mut map = BTreeMap::new(); - map.extend(iter); - map + let mut inputs: Vec<_> = iter.into_iter().collect(); + + if inputs.is_empty() { + return BTreeMap::new(); + } + + // use stable sort to preserve the insertion order. + inputs.sort_by(|a, b| a.0.cmp(&b.0)); + BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter()) } } @@ -2025,8 +2045,14 @@ impl From<[(K, V); N]> for BTreeMap { /// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into(); /// assert_eq!(map1, map2); /// ``` - fn from(arr: [(K, V); N]) -> Self { - core::array::IntoIter::new(arr).collect() + fn from(mut arr: [(K, V); N]) -> Self { + if N == 0 { + return BTreeMap::new(); + } + + // use stable sort to preserve the insertion order. + arr.sort_by(|a, b| a.0.cmp(&b.0)); + BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr)) } } diff --git a/library/alloc/src/collections/btree/mod.rs b/library/alloc/src/collections/btree/mod.rs index f74172c7d976f..9571b3d594df8 100644 --- a/library/alloc/src/collections/btree/mod.rs +++ b/library/alloc/src/collections/btree/mod.rs @@ -1,5 +1,6 @@ mod append; mod borrow; +mod dedup_sorted_iter; mod fix; pub mod map; mod mem; From a03287bbf765ce7ac0e2ae9e64d8ade168ece301 Mon Sep 17 00:00:00 2001 From: Cheng XU Date: Sat, 28 Aug 2021 16:57:58 -0700 Subject: [PATCH 3/3] BTreeSet::from_iter: use bulk building to improve the performance Apply the same optimization as BTreeMap::from_iter. --- library/alloc/src/collections/btree/set.rs | 27 ++++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/library/alloc/src/collections/btree/set.rs b/library/alloc/src/collections/btree/set.rs index 0c268ad32b261..fca281a63bbe5 100644 --- a/library/alloc/src/collections/btree/set.rs +++ b/library/alloc/src/collections/btree/set.rs @@ -1,6 +1,7 @@ // This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface // to TreeMap +use crate::vec::Vec; use core::borrow::Borrow; use core::cmp::Ordering::{Equal, Greater, Less}; use core::cmp::{max, min}; @@ -1059,9 +1060,17 @@ impl BTreeSet { #[stable(feature = "rust1", since = "1.0.0")] impl FromIterator for BTreeSet { fn from_iter>(iter: I) -> BTreeSet { - let mut set = BTreeSet::new(); - set.extend(iter); - set + let mut inputs: Vec<_> = iter.into_iter().collect(); + + if inputs.is_empty() { + return BTreeSet::new(); + } + + // use stable sort to preserve the insertion order. + inputs.sort(); + let iter = inputs.into_iter().map(|k| (k, ())); + let map = BTreeMap::bulk_build_from_sorted_iter(iter); + BTreeSet { map } } } @@ -1074,8 +1083,16 @@ impl From<[T; N]> for BTreeSet { /// let set2: BTreeSet<_> = [1, 2, 3, 4].into(); /// assert_eq!(set1, set2); /// ``` - fn from(arr: [T; N]) -> Self { - core::array::IntoIter::new(arr).collect() + fn from(mut arr: [T; N]) -> Self { + if N == 0 { + return BTreeSet::new(); + } + + // use stable sort to preserve the insertion order. + arr.sort(); + let iter = core::array::IntoIter::new(arr).map(|k| (k, ())); + let map = BTreeMap::bulk_build_from_sorted_iter(iter); + BTreeSet { map } } }