rust-lang · logannc · Oct 28, 2021 · Oct 28, 2021 · Oct 28, 2021 · Oct 28, 2021
diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs
@@ -25,6 +25,7 @@
 #![feature(const_btree_new)]
 #![feature(const_default_impls)]
 #![feature(const_trait_impl)]
+#![feature(split_rinclusive)]
 
 use std::collections::hash_map::DefaultHasher;
 use std::hash::{Hash, Hasher};

diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs
@@ -1325,6 +1325,21 @@ fn test_split_char_iterator_inclusive() {
     assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]);
 }
 
+
+#[test]
+fn test_split_char_iterator_rinclusive() {
+    let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+    let split: Vec<&str> = data.split_rinclusive('\n').collect();
+    assert_eq!(split, ["\nMäry häd ä little lämb", "\nLittle lämb", "\n"]);
+
+    let uppercase_separated = "SheepSharkTurtleCat";
+    let split: Vec<&str> = uppercase_separated
+        .split_rinclusive(char::is_uppercase)
+        .collect();
+    assert_eq!(split, ["Sheep", "Shark", "Turtle", "Cat"]);
+}
+
 #[test]
 fn test_split_char_iterator_inclusive_rev() {
     let data = "\nMäry häd ä little lämb\nLittle lämb\n";
@@ -1349,6 +1364,21 @@ fn test_split_char_iterator_inclusive_rev() {
     assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]);
 }
 
+#[test]
+fn test_split_char_iterator_rinclusive_rev() {
+    let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+    let split: Vec<&str> = data.split_rinclusive('\n').rev().collect();
+    assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb"]);
+
+    let uppercase_separated = "SheepSharkTurtleCat";
+    let split: Vec<&str> = uppercase_separated
+        .split_rinclusive(char::is_uppercase)
+        .rev()
+        .collect();
+    assert_eq!(split, ["Cat", "Turtle", "Shark", "Sheep"]);
+}
+
 #[test]
 fn test_rsplit() {
     let data = "\nMäry häd ä little lämb\nLittle lämb\n";

diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
@@ -564,6 +564,7 @@ pub(super) struct SplitInternal<'a, P: Pattern<'a>> {
     pub(super) end: usize,
     pub(super) matcher: P::Searcher,
     pub(super) allow_trailing_empty: bool,
+    pub(super) allow_leading_empty: bool,
     pub(super) finished: bool,
 }
 
@@ -577,6 +578,7 @@ where
             .field("end", &self.end)
             .field("matcher", &self.matcher)
             .field("allow_trailing_empty", &self.allow_trailing_empty)
+            .field("allow_leading_empty", &self.allow_leading_empty)
             .field("finished", &self.finished)
             .finish()
     }
@@ -603,6 +605,18 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
             return None;
         }
 
+        if !self.allow_leading_empty {
+            self.allow_leading_empty = true;
+            match self.next() {
+                Some(elt) if !elt.is_empty() => return Some(elt),
+                _ => {
+                    if self.finished {
+                        return None;
+                    }
+                }
+            }
+        }
+
         let haystack = self.matcher.haystack();
         match self.matcher.next_match() {
             // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
@@ -635,6 +649,38 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
         }
     }
 
+    #[inline]
+    fn next_rinclusive(&mut self) -> Option<&'a str> {
+        if self.finished {
+            return None;
+        }
+
+        if !self.allow_leading_empty {
+            self.allow_leading_empty = true;
+            match self.next_rinclusive() {
+                Some(elt) if !elt.is_empty() => return Some(elt),
+                _ => {
+                    if self.finished {
+                        return None;
+                    }
+                }
+            }
+        }
+
+        let haystack = self.matcher.haystack();
+        match self.matcher.next_match() {
+            // SAFETY: `Searcher` guarantees that `a` lies on unicode boundaries,
+            // and self.start is either the start of the original string,
+            // or `a` was assigned to it, so it also lies on unicode boundary.
+            Some((a, _)) => unsafe {
+                let elt = haystack.get_unchecked(self.start..a);
+                self.start = a;
+                Some(elt)
+            },
+            None => self.get_end(),
+        }
+    }
+
     #[inline]
     fn next_back(&mut self) -> Option<&'a str>
     where
@@ -715,6 +761,37 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
         }
     }
 
+    #[inline]
+    fn next_back_rinclusive(&mut self) -> Option<&'a str>
+    where
+        P::Searcher: ReverseSearcher<'a>,
+    {
+        if self.finished {
+            return None;
+        }
+
+        let haystack = self.matcher.haystack();
+        match self.matcher.next_match_back() {
+            // SAFETY: `Searcher` guarantees that `a` lies on unicode boundary,
+            // and self.end is either the end of the original string,
+            // or `a` was assigned to it, so it also lies on unicode boundary.
+            Some((a, _)) => unsafe {
+                let elt = haystack.get_unchecked(a..self.end);
+                self.end = a;
+                Some(elt)
+            },
+            // SAFETY: self.start is either the start of the original string,
+            // or start of a substring that represents the part of the string that hasn't
+            // iterated yet. Either way, it is guaranteed to lie on unicode boundary.
+            // self.end is either the end of the original string,
+            // or `a` was assigned to it, so it also lies on unicode boundary.
+            None => unsafe {
+                self.finished = true;
+                Some(haystack.get_unchecked(self.start..self.end))
+            },
+        }
+    }
+
     #[inline]
     fn as_str(&self) -> &'a str {
         // `Self::get_end` doesn't change `self.start`
@@ -1376,6 +1453,78 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> {
     }
 }
 
+/// An iterator over the substrings of a string,
+/// new substrings beginning when matching to a predicate function.
+/// Unlike `Split`, it contains the matched part as the start
+/// of each subslice - besides the first, which is the contents
+/// up until the first match.
+///
+/// This struct is created by the [`split_rinclusive`] method on [`str`].
+/// See its documentation for more.
+///
+/// [`split_rinclusive`]: str::split_rinclusive
+#[unstable(feature = "split_rinclusive", issue = "none")]
+pub struct SplitRInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>);
+
+#[unstable(feature = "split_rinclusive", issue = "none")]
+impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> {
+    type Item = &'a str;
+
+    #[inline]
+    fn next(&mut self) -> Option<&'a str> {
+        self.0.next_rinclusive()
+    }
+}
+
+#[unstable(feature = "split_rinclusive", issue = "none")]
+impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("SplitRInclusive").field("0", &self.0).finish()
+    }
+}
+
+// FIXME(#26925) Remove in favor of `#[derive(Clone)]`
+#[unstable(feature = "split_rinclusive", issue = "none")]
+impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitRInclusive<'a, P> {
+    fn clone(&self) -> Self {
+        SplitRInclusive(self.0.clone())
+    }
+}
+
+#[unstable(feature = "split_rinclusive", issue = "none")]
+impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator
+    for SplitRInclusive<'a, P>
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<&'a str> {
+        self.0.next_back_rinclusive()
+    }
+}
+
+#[unstable(feature = "split_rinclusive", issue = "none")]
+impl<'a, P: Pattern<'a>> FusedIterator for SplitRInclusive<'a, P> {}
+
+impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> {
+    /// Returns remainder of the splitted string
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(str_split_rinclusive_as_str)]
+    /// let mut split = "Mary had a little lamb".split_rinclusive(' ');
+    /// assert_eq!(split.as_str(), "Mary had a little lamb");
+    /// split.next();
+    /// assert_eq!(split.as_str(), " had a little lamb");
+    /// split.by_ref().for_each(drop);
+    /// assert_eq!(split.as_str(), "");
+    /// ```
+    #[inline]
+    #[unstable(feature = "split_rinclusive", issue = "none")]
+    pub fn as_str(&self) -> &'a str {
+        self.0.as_str()
+    }
+}
+
 /// An iterator of [`u16`] over the string encoded as UTF-16.
 ///
 /// This struct is created by the [`encode_utf16`] method on [`str`].

diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
@@ -68,6 +68,9 @@ pub use iter::SplitAsciiWhitespace;
 #[stable(feature = "split_inclusive", since = "1.51.0")]
 pub use iter::SplitInclusive;
 
+#[unstable(feature = "split_rinclusive", issue = "none")]
+pub use iter::SplitRInclusive;
+
 #[unstable(feature = "str_internals", issue = "none")]
 pub use validations::{next_code_point, utf8_char_width};
 
@@ -1230,6 +1233,7 @@ impl str {
             end: self.len(),
             matcher: pat.into_searcher(self),
             allow_trailing_empty: true,
+            allow_leading_empty: true,
             finished: false,
         })
     }
@@ -1270,6 +1274,58 @@ impl str {
             end: self.len(),
             matcher: pat.into_searcher(self),
             allow_trailing_empty: false,
+            allow_leading_empty: true,
+            finished: false,
+        })
+    }
+
+    /// An iterator over substrings of this string slice, separated by
+    /// characters matched by a pattern. Differs from the iterator produced by
+    /// `split` in that `split_rinclusive` leaves the matched part as the
+    /// beginning of the next substring, except possibly the first which is whatever before the first match.
+    ///
+    /// Put another way, a match is the start of a new substring.
+    ///
+    /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
+    /// function or closure that determines if a character matches.
+    ///
+    /// [`char`]: prim@char
+    /// [pattern]: self::pattern
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
+    ///     .split_rinclusive('\n').collect();
+    /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb."]);
+    /// ```
+    ///
+    /// If the first element of the string is matched,
+    /// the leading empty string is omitted.
+    ///
+    /// ```
+    /// let v: Vec<&str> = "MaryHadALittleLamb"
+    ///     .split_rinclusive(char::is_uppercase).collect();
+    /// assert_eq!(v, ["Mary", "Had", "A", "Little", "Lamb]);
+    /// ```
+    ///
+    /// If the last element of the string is matched,
+    /// that element will be considered the final substring returned by the iterator.
+    ///
+    /// ```
+    /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
+    ///     .split_rinclusive('\n').collect();
+    /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb.", "\n"]);
+    /// ```
+    #[unstable(feature = "split_rinclusive", issue = "none")]
+    #[inline]
+    pub fn split_rinclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitRInclusive<'a, P> {
+        SplitRInclusive(SplitInternal {
+            start: 0,
+            end: self.len(),
+            matcher: pat.into_searcher(self),
+            allow_trailing_empty: false,
+            allow_leading_empty: false,
             finished: false,
         })
     }