From 50f6fdb7eec01306078c09c0441d31ee113f2bba Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 25 Apr 2022 17:08:28 -0600 Subject: [PATCH 1/7] Extension collection --- .../Evolution/StringProcessingAlgorithms.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index eee05e3a7..a08302ddf 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -238,7 +238,7 @@ extension Collection where Element: Equatable { where S.Element == Element } -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Returns a Boolean value indicating whether the collection contains the /// given regex. /// - Parameter regex: A regex to search for within this collection. @@ -262,7 +262,7 @@ extension BidirectionalCollection where SubSequence == Substring { #### Starts with ```swift -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Returns a Boolean value indicating whether the initial elements of the /// sequence are the same as the elements in the specified regex. /// - Parameter regex: A regex to compare to this sequence. @@ -339,7 +339,7 @@ extension RangeReplaceableCollection where Element: Equatable { where Prefix.Element == Element } -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Returns a new subsequence by removing the initial elements that matches /// the given regex. /// - Parameter regex: The regex to remove from this collection. @@ -404,7 +404,7 @@ extension BidirectionalCollection where Element: Comparable { where C.Element == Element } -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Finds and returns the range of the first occurrence of a given regex /// within the collection. /// - Parameter regex: The regex to search for. @@ -438,7 +438,7 @@ extension Collection where Element: Equatable { where C.Element == Element } -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Finds and returns the ranges of the all occurrences of a given sequence /// within the collection. /// - Parameter regex: The regex to search for. @@ -462,7 +462,7 @@ extension BidirectionalCollection where SubSequence == Substring { #### Match ```swift -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Returns the first match of the specified regex within the collection. /// - Parameter regex: The regex to search for. /// - Returns: The first match of `regex` in the collection, or `nil` if @@ -512,7 +512,7 @@ extension BidirectionalCollection where SubSequence == Substring { #### Matches ```swift -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Returns a collection containing all matches of the specified regex. /// - Parameter regex: The regex to search for. /// - Returns: A collection of matches of `regex`. @@ -814,7 +814,7 @@ extension Collection where Element: Equatable { ) -> some Collection where C.Element == Element } -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { /// Returns the longest possible subsequences of the collection, in order, /// around subsequence that match the given separator regex. /// From 2bd4ade16dddb5216df03900f191795d7649e033 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 25 Apr 2022 18:03:28 -0600 Subject: [PATCH 2/7] Break out long excerpts --- .../Evolution/StringProcessingAlgorithms.md | 414 ++++++++++++------ 1 file changed, 278 insertions(+), 136 deletions(-) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index a08302ddf..0d1d1d756 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -31,7 +31,7 @@ while let r = str.range(of: "banana", options: [], range: idx.. @@ -133,13 +133,13 @@ Parsing a currency string such as `$3,020.85` with regex is also tricky, as it c ### Complex string processing We propose a `CustomConsumingRegexComponent` protocol which allows types from outside the standard library participate in regex builders and `RegexComponent` algorithms. This allows types, such as `Date.ParseStrategy` and `FloatingPointFormatStyle.Currency`, to be used directly within a regex: - + ```swift let dateRegex = Regex { Capture(dateParser) } -let date: Date = header.firstMatch(of: dateRegex).map(\.result.1) +let date: Date = header.firstMatch(of: dateRegex).map(\.result.1) let currencyRegex = Regex { Capture(.localizedCurrency(code: "USD").sign(strategy: .accounting)) @@ -174,7 +174,7 @@ We also propose the following regex-powered algorithms as well as their generic `CustomConsumingRegexComponent` inherits from `RegexComponent` and satisfies its sole requirement. Conformers can be used with all of the string algorithms generic over `RegexComponent`. ```swift -/// A protocol allowing custom types to function as regex components by +/// A protocol allowing custom types to function as regex components by /// providing the raw functionality backing `prefixMatch`. public protocol CustomConsumingRegexComponent: RegexComponent { /// Process the input string within the specified bounds, beginning at the given index, and return @@ -199,7 +199,7 @@ public protocol CustomConsumingRegexComponent: RegexComponent { We use Foundation `FloatingPointFormatStyle.Currency` as an example for protocol conformance. It would implement the `match` function with `Match` being a `Decimal`. It could also add a static function `.localizedCurrency(code:)` as a member of `RegexComponent`, so it can be referred as `.localizedCurrency(code:)` in the `Regex` result builder: ```swift -extension FloatingPointFormatStyle.Currency : CustomConsumingRegexComponent { +extension FloatingPointFormatStyle.Currency : CustomConsumingRegexComponent { public func consuming( _ input: String, startingAt index: String.Index, @@ -223,10 +223,12 @@ let regex = Regex { -### String algorithm additions +### String and Collection algorithm additions #### Contains +We propose a `contains` variant over collections that tests for subsequence membership. The second algorithm allows for specialization using e.g. the [two way search algorithm](https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm). + ```swift extension Collection where Element: Equatable { /// Returns a Boolean value indicating whether the collection contains the @@ -237,7 +239,20 @@ extension Collection where Element: Equatable { public func contains(_ other: C) -> Bool where S.Element == Element } +extension BidirectionalCollection where Element: Comparable { + /// Returns a Boolean value indicating whether the collection contains the + /// given sequence. + /// - Parameter other: A sequence to search for within this collection. + /// - Returns: `true` if the collection contains the specified sequence, + /// otherwise `false`. + public func contains(_ other: C) -> Bool + where S.Element == Element +} +``` +We propose a regex-taking variant over string types (those that produce a `Substring` upon slicing). + +```swift extension Collection where SubSequence == Substring { /// Returns a Boolean value indicating whether the collection contains the /// given regex. @@ -245,7 +260,10 @@ extension Collection where SubSequence == Substring { /// - Returns: `true` if the regex was found in the collection, otherwise /// `false`. public func contains(_ regex: some RegexComponent) -> Bool +} +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns a Boolean value indicating whether this collection contains a /// match for the regex, where the regex is created by the given closure. /// @@ -261,6 +279,8 @@ extension Collection where SubSequence == Substring { #### Starts with +We propose a regex-taking `starts(with:)` variant for string types: + ```swift extension Collection where SubSequence == Substring { /// Returns a Boolean value indicating whether the initial elements of the @@ -269,7 +289,10 @@ extension Collection where SubSequence == Substring { /// - Returns: `true` if the initial elements of the sequence matches the /// beginning of `regex`; otherwise, `false`. public func starts(with regex: some RegexComponent) -> Bool - +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns a Boolean value indicating whether the initial elements of this /// collection are a match for the regex created by the given closure. /// @@ -285,6 +308,8 @@ extension Collection where SubSequence == Substring { #### Trim prefix +We propose generic `trimPrefix` for collections. + ```swift extension Collection { /// Returns a new collection of the same type by removing initial elements @@ -338,7 +363,11 @@ extension RangeReplaceableCollection where Element: Equatable { public mutating func trimPrefix(_ prefix: Prefix) where Prefix.Element == Element } +``` +We propose regex-taking variants for string types: + +```swift extension Collection where SubSequence == Substring { /// Returns a new subsequence by removing the initial elements that matches /// the given regex. @@ -346,7 +375,10 @@ extension Collection where SubSequence == Substring { /// - Returns: A new subsequence containing the elements of the collection /// that does not match `prefix` from the start. public func trimmingPrefix(_ regex: some RegexComponent) -> SubSequence - +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns a subsequence of this collection by removing the elements /// matching the regex from the start, where the regex is created by /// the given closure. @@ -362,13 +394,14 @@ extension Collection where SubSequence == Substring { ) -> SubSequence } -extension RangeReplaceableCollection - where Self: BidirectionalCollection, SubSequence == Substring -{ +extension RangeReplaceableCollection where SubSequence == Substring { /// Removes the initial elements that matches the given regex. /// - Parameter regex: The regex to remove from this collection. public mutating func trimPrefix(_ regex: some RegexComponent) - +} + +// In RegexBuilder module +extension RangeReplaceableCollection where SubSequence == Substring { /// Removes the initial elements matching the regex from the start of /// this collection, if the initial elements match, using the given closure /// to create the regex. @@ -383,6 +416,8 @@ extension RangeReplaceableCollection #### First range +We propose a generic collection algorithm for finding the first range of a given subsequence: + ```swift extension Collection where Element: Equatable { /// Finds and returns the range of the first occurrence of a given sequence @@ -390,7 +425,7 @@ extension Collection where Element: Equatable { /// - Parameter sequence: The sequence to search for. /// - Returns: A range in the collection of the first occurrence of `sequence`. /// Returns nil if `sequence` is not found. - public func firstRange(of other: C) -> Range? + public func firstRange(of other: C) -> Range? where C.Element == Element } @@ -403,6 +438,11 @@ extension BidirectionalCollection where Element: Comparable { public func firstRange(of other: C) -> Range? where C.Element == Element } +``` + +We propose a regex-taking variant for string types. + +```swift extension Collection where SubSequence == Substring { /// Finds and returns the range of the first occurrence of a given regex @@ -411,7 +451,10 @@ extension Collection where SubSequence == Substring { /// - Returns: A range in the collection of the first occurrence of `regex`. /// Returns `nil` if `regex` is not found. public func firstRange(of regex: some RegexComponent) -> Range? - +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns the range of the first match for the regex within this collection, /// where the regex is created by the given closure. /// @@ -421,12 +464,14 @@ extension Collection where SubSequence == Substring { /// for the regex is found. public func firstRange( @RegexComponentBuilder of content: () -> some RegexComponent - ) -> Range? + ) -> Range? } ``` #### Ranges +We propose a generic collection algorithm for iterating over all (non-overlapping) ranges of a given subsequence. + ```swift extension Collection where Element: Equatable { /// Finds and returns the ranges of the all occurrences of a given sequence @@ -438,6 +483,20 @@ extension Collection where Element: Equatable { where C.Element == Element } +extension BidirectionalCollection where Element: Comparable { + /// Finds and returns the ranges of the all occurrences of a given sequence + /// within the collection. + /// - Parameter other: The sequence to search for. + /// - Returns: A collection of ranges of all occurrences of `other`. Returns + /// an empty collection if `other` is not found. + public func ranges(of other: C) -> some Collection> + where C.Element == Element +} +``` + +And of course regex-taking versions for string types: + +```swift extension Collection where SubSequence == Substring { /// Finds and returns the ranges of the all occurrences of a given sequence /// within the collection. @@ -445,7 +504,10 @@ extension Collection where SubSequence == Substring { /// - Returns: A collection or ranges in the receiver of all occurrences of /// `regex`. Returns an empty collection if `regex` is not found. public func ranges(of regex: some RegexComponent) -> some Collection> - +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns the ranges of the all non-overlapping matches for the regex /// within this collection, where the regex is created by the given closure. /// @@ -461,6 +523,8 @@ extension Collection where SubSequence == Substring { #### Match +We propose algorithms for extracting a `Match` instance from a given regex from the start, anywhere in the middle, or over the entire `self`. + ```swift extension Collection where SubSequence == Substring { /// Returns the first match of the specified regex within the collection. @@ -468,7 +532,20 @@ extension Collection where SubSequence == Substring { /// - Returns: The first match of `regex` in the collection, or `nil` if /// there isn't a match. public func firstMatch(of regex: R) -> Regex.Match? - + + /// Match a regex in its entirety. + /// - Parameter regex: The regex to match against. + /// - Returns: The match if there is one, or `nil` if none. + public func wholeMatch(of regex: R) -> Regex.Match? + + /// Match part of the regex, starting at the beginning. + /// - Parameter regex: The regex to match against. + /// - Returns: The match if there is one, or `nil` if none. + public func prefixMatch(of regex: R) -> Regex.Match? +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns the first match for the regex within this collection, where /// the regex is created by the given closure. /// @@ -477,13 +554,8 @@ extension Collection where SubSequence == Substring { /// collection, or `nil` if no match is found. public func firstMatch( @RegexComponentBuilder of content: () -> R - ) -> Regex.Match? - - /// Match a regex in its entirety. - /// - Parameter regex: The regex to match against. - /// - Returns: The match if there is one, or `nil` if none. - public func wholeMatch(of regex: R) -> Regex.Match? - + ) -> Regex.Match? + /// Matches a regex in its entirety, where the regex is created by /// the given closure. /// @@ -491,13 +563,8 @@ extension Collection where SubSequence == Substring { /// - Returns: The match if there is one, or `nil` if none. public func wholeMatch( @RegexComponentBuilder of content: () -> R - ) -> Regex.Match? - - /// Match part of the regex, starting at the beginning. - /// - Parameter regex: The regex to match against. - /// - Returns: The match if there is one, or `nil` if none. - public func prefixMatch(of regex: R) -> Regex.Match? - + ) -> Regex.Match? + /// Matches part of the regex, starting at the beginning, where the regex /// is created by the given closure. /// @@ -511,13 +578,18 @@ extension Collection where SubSequence == Substring { #### Matches +We propose an algorithm for iterating over all (non-overlapping) matches of a given regex: + ```swift extension Collection where SubSequence == Substring { /// Returns a collection containing all matches of the specified regex. /// - Parameter regex: The regex to search for. /// - Returns: A collection of matches of `regex`. public func matches(of regex: R) -> some Collection.Match> - +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns a collection containing all non-overlapping matches of /// the regex, created by the given closure. /// @@ -532,6 +604,8 @@ extension Collection where SubSequence == Substring { #### Replace +We propose generic collection algorithms that will replace all occurences of a given subsequence: + ```swift extension RangeReplaceableCollection where Element: Equatable { /// Returns a new collection in which all occurrences of a target sequence @@ -543,14 +617,14 @@ extension RangeReplaceableCollection where Element: Equatable { /// - maxReplacements: A number specifying how many occurrences of `other` /// to replace. Default is `Int.max`. /// - Returns: A new collection in which all occurrences of `other` in - /// `subrange` of the collection are replaced by `replacement`. + /// `subrange` of the collection are replaced by `replacement`. public func replacing( _ other: C, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max ) -> Self where C.Element == Element, Replacement.Element == Element - + /// Returns a new collection in which all occurrences of a target sequence /// are replaced by another collection. /// - Parameters: @@ -565,7 +639,7 @@ extension RangeReplaceableCollection where Element: Equatable { with replacement: Replacement, maxReplacements: Int = .max ) -> Self where C.Element == Element, Replacement.Element == Element - + /// Replaces all occurrences of a target sequence with a given collection /// - Parameters: /// - other: The sequence to replace. @@ -578,7 +652,56 @@ extension RangeReplaceableCollection where Element: Equatable { maxReplacements: Int = .max ) where C.Element == Element, Replacement.Element == Element } +extension RangeReplaceableCollection where Self: BidirectionalCollection, Element: Comparable { + /// Returns a new collection in which all occurrences of a target sequence + /// are replaced by another collection. + /// - Parameters: + /// - other: The sequence to replace. + /// - replacement: The new elements to add to the collection. + /// - subrange: The range in the collection in which to search for `other`. + /// - maxReplacements: A number specifying how many occurrences of `other` + /// to replace. Default is `Int.max`. + /// - Returns: A new collection in which all occurrences of `other` in + /// `subrange` of the collection are replaced by `replacement`. + public func replacing( + _ other: C, + with replacement: Replacement, + subrange: Range, + maxReplacements: Int = .max + ) -> Self where C.Element == Element, Replacement.Element == Element + /// Returns a new collection in which all occurrences of a target sequence + /// are replaced by another collection. + /// - Parameters: + /// - other: The sequence to replace. + /// - replacement: The new elements to add to the collection. + /// - maxReplacements: A number specifying how many occurrences of `other` + /// to replace. Default is `Int.max`. + /// - Returns: A new collection in which all occurrences of `other` in + /// `subrange` of the collection are replaced by `replacement`. + public func replacing( + _ other: C, + with replacement: Replacement, + maxReplacements: Int = .max + ) -> Self where C.Element == Element, Replacement.Element == Element + + /// Replaces all occurrences of a target sequence with a given collection + /// - Parameters: + /// - other: The sequence to replace. + /// - replacement: The new elements to add to the collection. + /// - maxReplacements: A number specifying how many occurrences of `other` + /// to replace. Default is `Int.max`. + public mutating func replace( + _ other: C, + with replacement: Replacement, + maxReplacements: Int = .max + ) where C.Element == Element, Replacement.Element == Element +} +``` + +We propose regex-taking variants for string types as well as variants that take a closure which will generate the replacement portion from a regex match (e.g. by reading captures). + +```swift extension RangeReplaceableCollection where SubSequence == Substring { /// Returns a new collection in which all occurrences of a sequence matching /// the given regex are replaced by another collection. @@ -597,6 +720,85 @@ extension RangeReplaceableCollection where SubSequence == Substring { maxReplacements: Int = .max ) -> Self where Replacement.Element == Element + /// Returns a new collection in which all occurrences of a sequence matching + /// the given regex are replaced by another collection. + /// - Parameters: + /// - regex: A regex describing the sequence to replace. + /// - replacement: The new elements to add to the collection. + /// - maxReplacements: A number specifying how many occurrences of the + /// sequence matching `regex` to replace. Default is `Int.max`. + /// - Returns: A new collection in which all occurrences of subsequence + /// matching `regex` are replaced by `replacement`. + public func replacing( + _ r: some RegexComponent, + with replacement: Replacement, + maxReplacements: Int = .max + ) -> Self where Replacement.Element == Element + + /// Replaces all occurrences of the sequence matching the given regex with + /// a given collection. + /// - Parameters: + /// - regex: A regex describing the sequence to replace. + /// - replacement: The new elements to add to the collection. + /// - maxReplacements: A number specifying how many occurrences of the + /// sequence matching `regex` to replace. Default is `Int.max`. + public mutating func replace( + _ r: some RegexComponent, + with replacement: Replacement, + maxReplacements: Int = .max + ) where Replacement.Element == Element + + /// Returns a new collection in which all occurrences of a sequence matching + /// the given regex are replaced by another regex match. + /// - Parameters: + /// - regex: A regex describing the sequence to replace. + /// - subrange: The range in the collection in which to search for `regex`. + /// - maxReplacements: A number specifying how many occurrences of the + /// sequence matching `regex` to replace. Default is `Int.max`. + /// - replacement: A closure that receives the full match information, + /// including captures, and returns a replacement collection. + /// - Returns: A new collection in which all occurrences of subsequence + /// matching `regex` are replaced by `replacement`. + public func replacing( + _ regex: R, + subrange: Range, + maxReplacements: Int = .max, + with replacement: (Regex.Match) throws -> Replacement + ) rethrows -> Self where Replacement.Element == Element + + /// Returns a new collection in which all occurrences of a sequence matching + /// the given regex are replaced by another collection. + /// - Parameters: + /// - regex: A regex describing the sequence to replace. + /// - maxReplacements: A number specifying how many occurrences of the + /// sequence matching `regex` to replace. Default is `Int.max`. + /// - replacement: A closure that receives the full match information, + /// including captures, and returns a replacement collection. + /// - Returns: A new collection in which all occurrences of subsequence + /// matching `regex` are replaced by `replacement`. + public func replacing( + _ regex: R, + maxReplacements: Int = .max, + with replacement: (Regex.Match) throws -> Replacement + ) rethrows -> Self where Replacement.Element == Element + + /// Replaces all occurrences of the sequence matching the given regex with + /// a given collection. + /// - Parameters: + /// - regex: A regex describing the sequence to replace. + /// - maxReplacements: A number specifying how many occurrences of the + /// sequence matching `regex` to replace. Default is `Int.max`. + /// - replacement: A closure that receives the full match information, + /// including captures, and returns a replacement collection. + public mutating func replace( + _ regex: R, + maxReplacements: Int = .max, + with replacement: (Regex.Match) throws -> Replacement + ) rethrows where Replacement.Element == Element +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns a new collection in which all matches for the regex /// are replaced, using the given closure to create the regex. /// @@ -610,29 +812,14 @@ extension RangeReplaceableCollection where SubSequence == Substring { /// - content: A closure that returns the collection to search for /// and replace. /// - Returns: A new collection in which all matches for regex in `subrange` - /// are replaced by `replacement`, using `content` to create the regex. + /// are replaced by `replacement`, using `content` to create the regex. public func replacing( with replacement: Replacement, subrange: Range, maxReplacements: Int = .max, @RegexComponentBuilder content: () -> some RegexComponent ) -> Self where Replacement.Element == Element - - /// Returns a new collection in which all occurrences of a sequence matching - /// the given regex are replaced by another collection. - /// - Parameters: - /// - regex: A regex describing the sequence to replace. - /// - replacement: The new elements to add to the collection. - /// - maxReplacements: A number specifying how many occurrences of the - /// sequence matching `regex` to replace. Default is `Int.max`. - /// - Returns: A new collection in which all occurrences of subsequence - /// matching `regex` are replaced by `replacement`. - public func replacing( - _ r: some RegexComponent, - with replacement: Replacement, - maxReplacements: Int = .max - ) -> Self where Replacement.Element == Element - + /// Returns a new collection in which all matches for the regex /// are replaced, using the given closure to create the regex. /// @@ -649,21 +836,8 @@ extension RangeReplaceableCollection where SubSequence == Substring { with replacement: Replacement, maxReplacements: Int = .max, @RegexComponentBuilder content: () -> some RegexComponent - ) -> Self where Replacement.Element == Element - - /// Replaces all occurrences of the sequence matching the given regex with - /// a given collection. - /// - Parameters: - /// - regex: A regex describing the sequence to replace. - /// - replacement: The new elements to add to the collection. - /// - maxReplacements: A number specifying how many occurrences of the - /// sequence matching `regex` to replace. Default is `Int.max`. - public mutating func replace( - _ r: some RegexComponent, - with replacement: Replacement, - maxReplacements: Int = .max - ) where Replacement.Element == Element - + ) -> Self where Replacement.Element == Element + /// Replaces all matches for the regex in this collection, using the given /// closure to create the regex. /// @@ -678,25 +852,7 @@ extension RangeReplaceableCollection where SubSequence == Substring { with replacement: Replacement, maxReplacements: Int = .max, @RegexComponentBuilder content: () -> some RegexComponent - ) where Replacement.Element == Element - - /// Returns a new collection in which all occurrences of a sequence matching - /// the given regex are replaced by another regex match. - /// - Parameters: - /// - regex: A regex describing the sequence to replace. - /// - subrange: The range in the collection in which to search for `regex`. - /// - maxReplacements: A number specifying how many occurrences of the - /// sequence matching `regex` to replace. Default is `Int.max`. - /// - replacement: A closure that receives the full match information, - /// including captures, and returns a replacement collection. - /// - Returns: A new collection in which all occurrences of subsequence - /// matching `regex` are replaced by `replacement`. - public func replacing( - _ regex: R, - subrange: Range, - maxReplacements: Int = .max, - with replacement: (Regex.Match) throws -> Replacement - ) rethrows -> Self where Replacement.Element == Element + ) where Replacement.Element == Element /// Returns a new collection in which all matches for the regex /// are replaced, using the given closures to create the replacement @@ -720,23 +876,7 @@ extension RangeReplaceableCollection where SubSequence == Substring { @RegexComponentBuilder content: () -> R, with replacement: (Regex.Match) throws -> Replacement ) rethrows -> Self where Replacement.Element == Element - - /// Returns a new collection in which all occurrences of a sequence matching - /// the given regex are replaced by another collection. - /// - Parameters: - /// - regex: A regex describing the sequence to replace. - /// - maxReplacements: A number specifying how many occurrences of the - /// sequence matching `regex` to replace. Default is `Int.max`. - /// - replacement: A closure that receives the full match information, - /// including captures, and returns a replacement collection. - /// - Returns: A new collection in which all occurrences of subsequence - /// matching `regex` are replaced by `replacement`. - public func replacing( - _ regex: R, - maxReplacements: Int = .max, - with replacement: (Regex.Match) throws -> Replacement - ) rethrows -> Self where Replacement.Element == Element - + /// Returns a new collection in which all matches for the regex /// are replaced, using the given closures to create the replacement /// and the regex. @@ -756,20 +896,6 @@ extension RangeReplaceableCollection where SubSequence == Substring { @RegexComponentBuilder content: () -> R, with replacement: (Regex.Match) throws -> Replacement ) rethrows -> Self where Replacement.Element == Element - - /// Replaces all occurrences of the sequence matching the given regex with - /// a given collection. - /// - Parameters: - /// - regex: A regex describing the sequence to replace. - /// - maxReplacements: A number specifying how many occurrences of the - /// sequence matching `regex` to replace. Default is `Int.max`. - /// - replacement: A closure that receives the full match information, - /// including captures, and returns a replacement collection. - public mutating func replace( - _ regex: R, - maxReplacements: Int = .max, - with replacement: (Regex.Match) throws -> Replacement - ) rethrows where Replacement.Element == Element /// Replaces all matches for the regex in this collection, using the /// given closures to create the replacement and the regex. @@ -791,6 +917,8 @@ extension RangeReplaceableCollection where SubSequence == Substring { #### Split +We propose a generic collection `split` that can take a subsequence separator: + ```swift extension Collection where Element: Equatable { /// Returns the longest possible subsequences of the collection, in order, @@ -798,11 +926,11 @@ extension Collection where Element: Equatable { /// /// - Parameters: /// - separator: A collection of elements to be split upon. - /// - maxSplits: The maximum number of times to split the collection, + /// - maxSplits: The maximum number of times to split the collection, /// or one less than the number of subsequences to return. - /// - omittingEmptySubsequences: If `false`, an empty subsequence is - /// returned in the result for each consecutive pair of separator - /// sequences in the collection and for each instance of separator + /// - omittingEmptySubsequences: If `false`, an empty subsequence is + /// returned in the result for each consecutive pair of separator + /// sequences in the collection and for each instance of separator /// sequences at the start or end of the collection. If `true`, only /// nonempty subsequences are returned. /// - Returns: A collection of subsequences, split from this collection's @@ -813,6 +941,11 @@ extension Collection where Element: Equatable { omittingEmptySubsequences: Bool = true ) -> some Collection where C.Element == Element } +``` + +And a regex-taking variant for string types: + +```swift extension Collection where SubSequence == Substring { /// Returns the longest possible subsequences of the collection, in order, @@ -820,9 +953,9 @@ extension Collection where SubSequence == Substring { /// /// - Parameters: /// - separator: A regex to be split upon. - /// - maxSplits: The maximum number of times to split the collection, + /// - maxSplits: The maximum number of times to split the collection, /// or one less than the number of subsequences to return. - /// - omittingEmptySubsequences: If `false`, an empty subsequence is + /// - omittingEmptySubsequences: If `false`, an empty subsequence is /// returned in the result for each consecutive pair of matches /// and for each match at the start or end of the collection. If /// `true`, only nonempty subsequences are returned. @@ -833,7 +966,10 @@ extension Collection where SubSequence == Substring { maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true ) -> some Collection - +} + +// In RegexBuilder module +extension Collection where SubSequence == Substring { /// Returns the longest possible subsequences of the collection, in order, /// around subsequence that match the regex created by the given closure. /// @@ -918,24 +1054,30 @@ Older versions of the pitch had `func match(...) -> (String.Index, T)?` as the p This protocol customizes the basic consume-from-the-front functionality. A protocol for customizing search is future work and involves accommodating different kinds of state and ways that a searcher may wish to speed up subsequent searches. Alternative names for the protocol include `CustomRegexComponent`, `CustomConsumingRegex`, etc., but we don't feel brevity is the key consideration here. +### Why `where SubSequence == Substring` + +A `Substring` slice requirement allows the regex engine to produce indicies in the original collection by operating over a portion of the input. Unfortunately, this is not one of the requirements of `StringProtocol`. + +A new protocol for types that can produce a `Substring` on request (e.g. from UTF-8 contents) would have to eagerly produce a `String` copy first and would need requirements to translate indices. When higher-level algorithms are implemented via multiple calls to the lower-level algorithms, these copies could happen many times. Shared strings are future work but a much better solution to this. + ## Future directions ### Backward algorithms -It would be useful to have algorithms that operate from the back of a collection, including ability to find the last non-overlapping range of a pattern in a string, and/or that to find the first range of a pattern when searching from the back, and trimming a string from both sides. They are deferred from this proposal as the API that could clarify the nuances of backward algorithms are still being explored. +It would be useful to have algorithms that operate from the back of a collection, including ability to find the last non-overlapping range of a pattern in a string, and/or that to find the first range of a pattern when searching from the back, and trimming a string from both sides. They are deferred from this proposal as the API that could clarify the nuances of backward algorithms are still being explored.
Nuances of backward algorithms -There is a subtle difference between finding the last non-overlapping range of a pattern in a string, and finding the first range of this pattern when searching from the back. +There is a subtle difference between finding the last non-overlapping range of a pattern in a string, and finding the first range of this pattern when searching from the back. -The currently proposed algorithm that finds a pattern from the front, e.g. `"aaaaa".ranges(of: "aa")`, produces two non-overlapping ranges, splitting the string in the chunks `aa|aa|a`. It would not be completely unreasonable to expect to introduce a counterpart, such as `"aaaaa".lastRange(of: "aa")`, to return the range that contains the third and fourth characters of the string. This would be a shorthand for `"aaaaa".ranges(of: "aa").last`. Yet, it would also be reasonable to expect the function to return the first range of `"aa"` when searching from the back of the string, i.e. the range that contains the fourth and fifth characters. +The currently proposed algorithm that finds a pattern from the front, e.g. `"aaaaa".ranges(of: "aa")`, produces two non-overlapping ranges, splitting the string in the chunks `aa|aa|a`. It would not be completely unreasonable to expect to introduce a counterpart, such as `"aaaaa".lastRange(of: "aa")`, to return the range that contains the third and fourth characters of the string. This would be a shorthand for `"aaaaa".ranges(of: "aa").last`. Yet, it would also be reasonable to expect the function to return the first range of `"aa"` when searching from the back of the string, i.e. the range that contains the fourth and fifth characters. -Trimming a string from both sides shares a similar story. For example, `"ababa".trimming("aba")` can return either `"ba"` or `"ab"`, depending on whether the prefix or the suffix was trimmed first. +Trimming a string from both sides shares a similar story. For example, `"ababa".trimming("aba")` can return either `"ba"` or `"ab"`, depending on whether the prefix or the suffix was trimmed first.
- + ### Future API -Some common string processing functions are not currently included in this proposal, such as trimming the suffix from a string/collection, and finding overlapping ranges of matched substrings. This pitch aims to establish a pattern for using `RegexComponent` with string processing algorithms, so that further enhancement can to be introduced to the standard library easily in the future, and eventually close the gap between Swift and other popular scripting languages. +Some common string processing functions are not currently included in this proposal, such as trimming the suffix from a string/collection, and finding overlapping ranges of matched substrings. This pitch aims to establish a pattern for using `RegexComponent` with string processing algorithms, so that further enhancement can to be introduced to the standard library easily in the future, and eventually close the gap between Swift and other popular scripting languages. From e22149dcfa19c70a096cd22894c10827573b7275 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 25 Apr 2022 18:09:00 -0600 Subject: [PATCH 3/7] cleanup --- Documentation/Evolution/StringProcessingAlgorithms.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index 0d1d1d756..14b518d13 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -308,7 +308,7 @@ extension Collection where SubSequence == Substring { #### Trim prefix -We propose generic `trimPrefix` for collections. +We propose generic `trimPrefix` for collections taking a `Element` predicate or a specific `Element`. ```swift extension Collection { @@ -443,7 +443,6 @@ extension BidirectionalCollection where Element: Comparable { We propose a regex-taking variant for string types. ```swift - extension Collection where SubSequence == Substring { /// Finds and returns the range of the first occurrence of a given regex /// within the collection. @@ -946,7 +945,6 @@ extension Collection where Element: Equatable { And a regex-taking variant for string types: ```swift - extension Collection where SubSequence == Substring { /// Returns the longest possible subsequences of the collection, in order, /// around subsequence that match the given separator regex. From 10f03ae5ed960466093fb1ca125bcbc7a2e86598 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 25 Apr 2022 18:10:13 -0600 Subject: [PATCH 4/7] bidi split --- .../Evolution/StringProcessingAlgorithms.md | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index 14b518d13..1d9671588 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -940,6 +940,27 @@ extension Collection where Element: Equatable { omittingEmptySubsequences: Bool = true ) -> some Collection where C.Element == Element } +extension BidirectionalCollection where Element: Comparable { + /// Returns the longest possible subsequences of the collection, in order, + /// around elements equal to the given separator collection. + /// + /// - Parameters: + /// - separator: A collection of elements to be split upon. + /// - maxSplits: The maximum number of times to split the collection, + /// or one less than the number of subsequences to return. + /// - omittingEmptySubsequences: If `false`, an empty subsequence is + /// returned in the result for each consecutive pair of separator + /// sequences in the collection and for each instance of separator + /// sequences at the start or end of the collection. If `true`, only + /// nonempty subsequences are returned. + /// - Returns: A collection of subsequences, split from this collection's + /// elements. + public func split( + separator: C, + maxSplits: Int = Int.max, + omittingEmptySubsequences: Bool = true + ) -> some Collection where C.Element == Element +} ``` And a regex-taking variant for string types: From 654d69909d0504ee6bfde122717ac01a4bcaead4 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 25 Apr 2022 18:11:35 -0600 Subject: [PATCH 5/7] mystery mark --- Documentation/Evolution/StringProcessingAlgorithms.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index 1d9671588..1233c0c31 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -1073,7 +1073,7 @@ Older versions of the pitch had `func match(...) -> (String.Index, T)?` as the p This protocol customizes the basic consume-from-the-front functionality. A protocol for customizing search is future work and involves accommodating different kinds of state and ways that a searcher may wish to speed up subsequent searches. Alternative names for the protocol include `CustomRegexComponent`, `CustomConsumingRegex`, etc., but we don't feel brevity is the key consideration here. -### Why `where SubSequence == Substring` +### Why `where SubSequence == Substring`? A `Substring` slice requirement allows the regex engine to produce indicies in the original collection by operating over a portion of the input. Unfortunately, this is not one of the requirements of `StringProtocol`. From 3f47e6da47a2e47dbb25331adbd731b46aa23bd6 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 25 Apr 2022 18:13:34 -0600 Subject: [PATCH 6/7] Update Documentation/Evolution/StringProcessingAlgorithms.md Co-authored-by: Nate Cook --- Documentation/Evolution/StringProcessingAlgorithms.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index 1233c0c31..81284e86c 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -308,7 +308,7 @@ extension Collection where SubSequence == Substring { #### Trim prefix -We propose generic `trimPrefix` for collections taking a `Element` predicate or a specific `Element`. +We propose generic `trimmingPrefix` and `trimPrefix` methods for collections that trim elements matching a predicate or a possible prefix sequence. ```swift extension Collection { From a9af7efdf46969aebfcda8ce56c6eba7da7a241c Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 25 Apr 2022 18:32:33 -0600 Subject: [PATCH 7/7] split preserving --- Documentation/Evolution/StringProcessingAlgorithms.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index 81284e86c..001ce1fec 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -1096,6 +1096,9 @@ The currently proposed algorithm that finds a pattern from the front, e.g. `"aaa Trimming a string from both sides shares a similar story. For example, `"ababa".trimming("aba")` can return either `"ba"` or `"ab"`, depending on whether the prefix or the suffix was trimmed first. +### Split preserving the separator + +Future work is a split variant that interweaves the separator with the separated portions. For example, when splitting over `\p{punctuation}` it might be useful to be able to preserve the punctionation as a separate entry in the returned collection. ### Future API