From acc82dc50ec135496d643d31750629f76f791480 Mon Sep 17 00:00:00 2001
From: Konrad Borowski <konrad@borowski.pw>
Date: Sat, 14 Oct 2017 09:47:17 +0200
Subject: [PATCH] Split graphemes and grapheme_indices into two methods

s.extended_graphemes() is more readable than s.graphemes(true), as
you don't have to think what does true mean here. Those methods were
implemented as default methods in order to preserve backward
compatibility if somebody implemented UnicodeSegmentation for their
own types.
---
 src/lib.rs  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/test.rs |  8 +++++++
 2 files changed, 71 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index 6f903c0..bc3204a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -107,6 +107,44 @@ pub trait UnicodeSegmentation {
     /// ```
     fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
 
+    /// Returns an iterator over the [legacy grapheme clusters][graphemes] of `self`.
+    ///
+    /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+    ///
+    /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
+    /// recommends extended grapheme cluster boundaries for general processing.
+    #[inline]
+    fn legacy_graphemes<'a>(&'a self) -> Graphemes<'a> {
+        self.graphemes(false)
+    }
+
+    /// Returns an iterator over the [extended grapheme clusters][graphemes] of `self`.
+    ///
+    /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+    ///
+    /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
+    /// recommends extended grapheme cluster boundaries for general processing.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use self::unicode_segmentation::UnicodeSegmentation;
+    /// let gr1 = UnicodeSegmentation::extended_graphemes("a\u{310}e\u{301}o\u{308}\u{332}")
+    ///           .collect::<Vec<&str>>();
+    /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
+    ///
+    /// assert_eq!(&gr1[..], b);
+    ///
+    /// let gr2 = UnicodeSegmentation::extended_graphemes("a\r\nb🇷🇺🇸🇹").collect::<Vec<&str>>();
+    /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺", "🇸🇹"];
+    ///
+    /// assert_eq!(&gr2[..], b);
+    /// ```
+    #[inline]
+    fn extended_graphemes<'a>(&'a self) -> Graphemes<'a> {
+        self.graphemes(true)
+    }
+
     /// Returns an iterator over the grapheme clusters of `self` and their
     /// byte offsets. See `graphemes()` for more information.
     ///
@@ -122,6 +160,31 @@ pub trait UnicodeSegmentation {
     /// ```
     fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
 
+    /// Returns an iterator over the legacy grapheme clusters of `self` and their
+    /// byte offsets. See `legacy_graphemes()` for more information.
+    #[inline]
+    fn legacy_grapheme_indices<'a>(&'a self) -> GraphemeIndices<'a> {
+        self.grapheme_indices(false)
+    }
+
+    /// Returns an iterator over the grapheme clusters of `self` and their
+    /// byte offsets. See `graphemes()` for more information.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use self::unicode_segmentation::UnicodeSegmentation;
+    /// let gr_inds = UnicodeSegmentation::extended_grapheme_indices("a̐éö̲\r\n")
+    ///               .collect::<Vec<(usize, &str)>>();
+    /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
+    ///
+    /// assert_eq!(&gr_inds[..], b);
+    /// ```
+    #[inline]
+    fn extended_grapheme_indices<'a>(&'a self) -> GraphemeIndices<'a> {
+        self.grapheme_indices(true)
+    }
+
     /// Returns an iterator over the words of `self`, separated on
     /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
     ///
diff --git a/src/test.rs b/src/test.rs
index 54493fe..4deb69f 100644
--- a/src/test.rs
+++ b/src/test.rs
@@ -44,20 +44,28 @@ fn test_graphemes() {
         // test forward iterator
         assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
         assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
+        assert!(UnicodeSegmentation::extended_graphemes(s).eq(g.iter().cloned()));
+        assert!(UnicodeSegmentation::legacy_graphemes(s).eq(g.iter().cloned()));
 
         // test reverse iterator
         assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned()));
         assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned()));
+        assert!(UnicodeSegmentation::extended_graphemes(s).rev().eq(g.iter().rev().cloned()));
+        assert!(UnicodeSegmentation::legacy_graphemes(s).rev().eq(g.iter().rev().cloned()));
     }
 
     for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
         // test forward iterator
         assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
         assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
+        assert!(UnicodeSegmentation::extended_graphemes(s).eq(gt.iter().cloned()));
+        assert!(UnicodeSegmentation::legacy_graphemes(s).eq(gf.iter().cloned()));
 
         // test reverse iterator
         assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned()));
         assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned()));
+        assert!(UnicodeSegmentation::extended_graphemes(s).rev().eq(gt.iter().rev().cloned()));
+        assert!(UnicodeSegmentation::legacy_graphemes(s).rev().eq(gf.iter().rev().cloned()));
     }
 
     // test the indices iterators