Skip to content

Add unstable feature 'split_rinclusive', adding a right-inclusive version of str::split_inclusive #90388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/alloc/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#![feature(const_btree_new)]
#![feature(const_default_impls)]
#![feature(const_trait_impl)]
#![feature(split_rinclusive)]

use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
Expand Down
30 changes: 30 additions & 0 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,21 @@ fn test_split_char_iterator_inclusive() {
assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]);
}


#[test]
fn test_split_char_iterator_rinclusive() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";

let split: Vec<&str> = data.split_rinclusive('\n').collect();
assert_eq!(split, ["\nMäry häd ä little lämb", "\nLittle lämb", "\n"]);

let uppercase_separated = "SheepSharkTurtleCat";
let split: Vec<&str> = uppercase_separated
.split_rinclusive(char::is_uppercase)
.collect();
assert_eq!(split, ["Sheep", "Shark", "Turtle", "Cat"]);
}

#[test]
fn test_split_char_iterator_inclusive_rev() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
Expand All @@ -1349,6 +1364,21 @@ fn test_split_char_iterator_inclusive_rev() {
assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]);
}

#[test]
fn test_split_char_iterator_rinclusive_rev() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";

let split: Vec<&str> = data.split_rinclusive('\n').rev().collect();
assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb"]);

let uppercase_separated = "SheepSharkTurtleCat";
let split: Vec<&str> = uppercase_separated
.split_rinclusive(char::is_uppercase)
.rev()
.collect();
assert_eq!(split, ["Cat", "Turtle", "Shark", "Sheep"]);
}

#[test]
fn test_rsplit() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
Expand Down
149 changes: 149 additions & 0 deletions library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ pub(super) struct SplitInternal<'a, P: Pattern<'a>> {
pub(super) end: usize,
pub(super) matcher: P::Searcher,
pub(super) allow_trailing_empty: bool,
pub(super) allow_leading_empty: bool,
pub(super) finished: bool,
}

Expand All @@ -577,6 +578,7 @@ where
.field("end", &self.end)
.field("matcher", &self.matcher)
.field("allow_trailing_empty", &self.allow_trailing_empty)
.field("allow_leading_empty", &self.allow_leading_empty)
.field("finished", &self.finished)
.finish()
}
Expand All @@ -603,6 +605,18 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
return None;
}

if !self.allow_leading_empty {
self.allow_leading_empty = true;
match self.next() {
Some(elt) if !elt.is_empty() => return Some(elt),
_ => {
if self.finished {
return None;
}
}
}
}

let haystack = self.matcher.haystack();
match self.matcher.next_match() {
// SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
Expand Down Expand Up @@ -635,6 +649,38 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
}
}

#[inline]
fn next_rinclusive(&mut self) -> Option<&'a str> {
if self.finished {
return None;
}

if !self.allow_leading_empty {
self.allow_leading_empty = true;
match self.next_rinclusive() {
Some(elt) if !elt.is_empty() => return Some(elt),
_ => {
if self.finished {
return None;
}
}
}
}

let haystack = self.matcher.haystack();
match self.matcher.next_match() {
// SAFETY: `Searcher` guarantees that `a` lies on unicode boundaries,
// and self.start is either the start of the original string,
// or `a` was assigned to it, so it also lies on unicode boundary.
Some((a, _)) => unsafe {
let elt = haystack.get_unchecked(self.start..a);
self.start = a;
Some(elt)
},
None => self.get_end(),
}
}

#[inline]
fn next_back(&mut self) -> Option<&'a str>
where
Expand Down Expand Up @@ -715,6 +761,37 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
}
}

#[inline]
fn next_back_rinclusive(&mut self) -> Option<&'a str>
where
P::Searcher: ReverseSearcher<'a>,
{
if self.finished {
return None;
}

let haystack = self.matcher.haystack();
match self.matcher.next_match_back() {
// SAFETY: `Searcher` guarantees that `a` lies on unicode boundary,
// and self.end is either the end of the original string,
// or `a` was assigned to it, so it also lies on unicode boundary.
Some((a, _)) => unsafe {
let elt = haystack.get_unchecked(a..self.end);
self.end = a;
Some(elt)
},
// SAFETY: self.start is either the start of the original string,
// or start of a substring that represents the part of the string that hasn't
// iterated yet. Either way, it is guaranteed to lie on unicode boundary.
// self.end is either the end of the original string,
// or `a` was assigned to it, so it also lies on unicode boundary.
None => unsafe {
self.finished = true;
Some(haystack.get_unchecked(self.start..self.end))
},
}
}

#[inline]
fn as_str(&self) -> &'a str {
// `Self::get_end` doesn't change `self.start`
Expand Down Expand Up @@ -1376,6 +1453,78 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> {
}
}

/// An iterator over the substrings of a string,
/// new substrings beginning when matching to a predicate function.
/// Unlike `Split`, it contains the matched part as the start
/// of each subslice - besides the first, which is the contents
/// up until the first match.
///
/// This struct is created by the [`split_rinclusive`] method on [`str`].
/// See its documentation for more.
///
/// [`split_rinclusive`]: str::split_rinclusive
#[unstable(feature = "split_rinclusive", issue = "none")]
pub struct SplitRInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>);

#[unstable(feature = "split_rinclusive", issue = "none")]
impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> {
type Item = &'a str;

#[inline]
fn next(&mut self) -> Option<&'a str> {
self.0.next_rinclusive()
}
}

#[unstable(feature = "split_rinclusive", issue = "none")]
impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a, P> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SplitRInclusive").field("0", &self.0).finish()
}
}

// FIXME(#26925) Remove in favor of `#[derive(Clone)]`
#[unstable(feature = "split_rinclusive", issue = "none")]
impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitRInclusive<'a, P> {
fn clone(&self) -> Self {
SplitRInclusive(self.0.clone())
}
}

#[unstable(feature = "split_rinclusive", issue = "none")]
impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator
for SplitRInclusive<'a, P>
{
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
self.0.next_back_rinclusive()
}
}

#[unstable(feature = "split_rinclusive", issue = "none")]
impl<'a, P: Pattern<'a>> FusedIterator for SplitRInclusive<'a, P> {}

impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> {
/// Returns remainder of the splitted string
///
/// # Examples
///
/// ```
/// #![feature(str_split_rinclusive_as_str)]
/// let mut split = "Mary had a little lamb".split_rinclusive(' ');
/// assert_eq!(split.as_str(), "Mary had a little lamb");
/// split.next();
/// assert_eq!(split.as_str(), " had a little lamb");
/// split.by_ref().for_each(drop);
/// assert_eq!(split.as_str(), "");
/// ```
#[inline]
#[unstable(feature = "split_rinclusive", issue = "none")]
pub fn as_str(&self) -> &'a str {
self.0.as_str()
}
}

/// An iterator of [`u16`] over the string encoded as UTF-16.
///
/// This struct is created by the [`encode_utf16`] method on [`str`].
Expand Down
56 changes: 56 additions & 0 deletions library/core/src/str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ pub use iter::SplitAsciiWhitespace;
#[stable(feature = "split_inclusive", since = "1.51.0")]
pub use iter::SplitInclusive;

#[unstable(feature = "split_rinclusive", issue = "none")]
pub use iter::SplitRInclusive;

#[unstable(feature = "str_internals", issue = "none")]
pub use validations::{next_code_point, utf8_char_width};

Expand Down Expand Up @@ -1230,6 +1233,7 @@ impl str {
end: self.len(),
matcher: pat.into_searcher(self),
allow_trailing_empty: true,
allow_leading_empty: true,
finished: false,
})
}
Expand Down Expand Up @@ -1270,6 +1274,58 @@ impl str {
end: self.len(),
matcher: pat.into_searcher(self),
allow_trailing_empty: false,
allow_leading_empty: true,
finished: false,
})
}

/// An iterator over substrings of this string slice, separated by
/// characters matched by a pattern. Differs from the iterator produced by
/// `split` in that `split_rinclusive` leaves the matched part as the
/// beginning of the next substring, except possibly the first which is whatever before the first match.
///
/// Put another way, a match is the start of a new substring.
///
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
/// function or closure that determines if a character matches.
///
/// [`char`]: prim@char
/// [pattern]: self::pattern
///
/// # Examples
///
/// ```
/// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
/// .split_rinclusive('\n').collect();
/// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb."]);
/// ```
///
/// If the first element of the string is matched,
/// the leading empty string is omitted.
///
/// ```
/// let v: Vec<&str> = "MaryHadALittleLamb"
/// .split_rinclusive(char::is_uppercase).collect();
/// assert_eq!(v, ["Mary", "Had", "A", "Little", "Lamb]);
/// ```
///
/// If the last element of the string is matched,
/// that element will be considered the final substring returned by the iterator.
///
/// ```
/// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
/// .split_rinclusive('\n').collect();
/// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb.", "\n"]);
/// ```
#[unstable(feature = "split_rinclusive", issue = "none")]
#[inline]
pub fn split_rinclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitRInclusive<'a, P> {
SplitRInclusive(SplitInternal {
start: 0,
end: self.len(),
matcher: pat.into_searcher(self),
allow_trailing_empty: false,
allow_leading_empty: false,
finished: false,
})
}
Expand Down