Skip to content

Commit b4a2b29

Browse files
committed
Added Itertools::dedup_with_count() and Itertools::dedup_by_with_count().
Fixes #393.
1 parent 417b85d commit b4a2b29

File tree

3 files changed

+157
-2
lines changed

3 files changed

+157
-2
lines changed

src/adaptors/mod.rs

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub use self::multi_product::*;
1010

1111
use std::fmt;
1212
use std::mem::replace;
13-
use std::iter::{Fuse, Peekable, FromIterator};
13+
use std::iter::{Fuse, Peekable, FromIterator, FusedIterator};
1414
use std::marker::PhantomData;
1515
use crate::size_hint;
1616

@@ -803,6 +803,88 @@ impl<I, Pred> Iterator for DedupBy<I, Pred>
803803
}
804804
}
805805

806+
/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
807+
/// repeated elements were present. This will determine equality using a comparison function.
808+
///
809+
/// See [`.dedup_by_with_count()`](../trait.Itertools.html#method.dedup_by_with_count) or
810+
/// [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
811+
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
812+
pub struct DedupByWithCount<I, Pred>
813+
where I: Iterator
814+
{
815+
iter: Peekable<I>,
816+
dedup_pred: Pred,
817+
}
818+
819+
/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
820+
/// repeated elements were present.
821+
///
822+
/// See [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
823+
pub type DedupWithCount<I> = DedupByWithCount<I, DedupEq>;
824+
825+
/// Create a new `DedupByWithCount`.
826+
pub fn dedup_by_with_count<I, Pred>(iter: I, dedup_pred: Pred) -> DedupByWithCount<I, Pred>
827+
where I: Iterator,
828+
{
829+
DedupByWithCount {
830+
iter: iter.peekable(),
831+
dedup_pred,
832+
}
833+
}
834+
835+
/// Create a new `DedupWithCount`.
836+
pub fn dedup_with_count<I>(iter: I) -> DedupWithCount<I>
837+
where I: Iterator
838+
{
839+
dedup_by_with_count(iter, DedupEq)
840+
}
841+
842+
impl<I, Pred> fmt::Debug for DedupByWithCount<I, Pred>
843+
where I: Iterator + fmt::Debug,
844+
I::Item: fmt::Debug,
845+
{
846+
debug_fmt_fields!(Dedup, iter);
847+
}
848+
849+
impl<I: Clone, Pred: Clone> Clone for DedupByWithCount<I, Pred>
850+
where I: Iterator,
851+
I::Item: Clone,
852+
{
853+
clone_fields!(iter, dedup_pred);
854+
}
855+
856+
impl<I, Pred> Iterator for DedupByWithCount<I, Pred>
857+
where I: Iterator,
858+
Pred: DedupPredicate<I::Item>,
859+
{
860+
type Item = (usize, I::Item);
861+
862+
fn next(&mut self) -> Option<(usize, I::Item)> {
863+
self.iter.next().map(|base| {
864+
let mut count: usize = 1;
865+
866+
while let Some(next) = self.iter.peek() {
867+
if self.dedup_pred.dedup_pair(&base, next) {
868+
self.iter.next();
869+
count += 1;
870+
} else {
871+
break;
872+
}
873+
}
874+
875+
(count, base)
876+
})
877+
}
878+
879+
fn size_hint(&self) -> (usize, Option<usize>) {
880+
let (low, high) = self.iter.size_hint();
881+
882+
((low > 0) as usize, high)
883+
}
884+
}
885+
886+
impl<I: Iterator, Pred: DedupPredicate<I::Item>> FusedIterator for DedupByWithCount<I, Pred> {}
887+
806888
/// An iterator adaptor that borrows from a `Clone`-able iterator
807889
/// to only pick off elements while the predicate returns `true`.
808890
///

src/lib.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ pub mod structs {
7878
pub use crate::adaptors::{
7979
Dedup,
8080
DedupBy,
81+
DedupWithCount,
82+
DedupByWithCount,
8183
Interleave,
8284
InterleaveShortest,
8385
Product,
@@ -970,7 +972,7 @@ pub trait Itertools : Iterator {
970972
/// use itertools::Itertools;
971973
///
972974
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
973-
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1==y.1),
975+
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1 == y.1),
974976
/// vec![(0, 1.), (0, 2.), (0, 3.), (1, 2.)]);
975977
/// ```
976978
fn dedup_by<Cmp>(self, cmp: Cmp) -> DedupBy<Self, Cmp>
@@ -980,6 +982,50 @@ pub trait Itertools : Iterator {
980982
adaptors::dedup_by(self, cmp)
981983
}
982984

985+
/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
986+
/// how many repeated elements were present.
987+
/// If the iterator is sorted, all elements will be unique.
988+
///
989+
/// Iterator element type is `(usize, Self::Item)`.
990+
///
991+
/// This iterator is *fused*.
992+
///
993+
/// ```
994+
/// use itertools::Itertools;
995+
///
996+
/// let data = vec![1., 1., 2., 3., 3., 2., 2.];
997+
/// itertools::assert_equal(data.into_iter().dedup_with_count(),
998+
/// vec![(2, 1.), (1, 2.), (2, 3.), (2, 2.)]);
999+
/// ```
1000+
fn dedup_with_count(self) -> DedupWithCount<Self>
1001+
where Self: Sized,
1002+
{
1003+
adaptors::dedup_with_count(self)
1004+
}
1005+
1006+
/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
1007+
/// how many repeated elements were present.
1008+
/// This will determine equality using a comparison function.
1009+
/// If the iterator is sorted, all elements will be unique.
1010+
///
1011+
/// Iterator element type is `(usize, Self::Item)`.
1012+
///
1013+
/// This iterator is *fused*.
1014+
///
1015+
/// ```
1016+
/// use itertools::Itertools;
1017+
///
1018+
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
1019+
/// itertools::assert_equal(data.into_iter().dedup_by_with_count(|x, y| x.1 == y.1),
1020+
/// vec![(2, (0, 1.)), (1, (0, 2.)), (2, (0, 3.)), (2, (1, 2.))]);
1021+
/// ```
1022+
fn dedup_by_with_count<Cmp>(self, cmp: Cmp) -> DedupByWithCount<Self, Cmp>
1023+
where Self: Sized,
1024+
Cmp: FnMut(&Self::Item, &Self::Item) -> bool,
1025+
{
1026+
adaptors::dedup_by_with_count(self, cmp)
1027+
}
1028+
9831029
/// Return an iterator adaptor that filters out elements that have
9841030
/// already been produced once during the iteration. Duplicates
9851031
/// are detected using hash and equality.

tests/test_std.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,33 @@ fn dedup_by() {
114114
assert_eq!(&xs_d, &ys);
115115
}
116116

117+
#[test]
118+
fn dedup_with_count() {
119+
let xs: [i32; 8] = [0, 1, 1, 1, 2, 1, 3, 3];
120+
let ys: [(usize, &i32); 5] = [(1, &0), (3, &1), (1, &2), (1, &1), (2, &3)];
121+
122+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());
123+
124+
let xs: [i32; 5] = [0, 0, 0, 0, 0];
125+
let ys: [(usize, &i32); 1] = [(5, &0)];
126+
127+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());
128+
}
129+
130+
131+
#[test]
132+
fn dedup_by_with_count() {
133+
let xs = [(0, 0), (0, 1), (1, 1), (2, 1), (0, 2), (3, 1), (0, 3), (1, 3)];
134+
let ys = [(1, &(0, 0)), (3, &(0, 1)), (1, &(0, 2)), (1, &(3, 1)), (2, &(0, 3))];
135+
136+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.1==y.1));
137+
138+
let xs = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)];
139+
let ys = [( 5, &(0, 1))];
140+
141+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.0==y.0));
142+
}
143+
117144
#[test]
118145
fn all_equal() {
119146
assert!("".chars().all_equal());

0 commit comments

Comments
 (0)