From d7599962caf775d505987e84eab86babb726633d Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Fri, 22 Apr 2022 16:24:36 -0500 Subject: [PATCH 1/8] Remove @testable annotations where possible --- Tests/RegexBuilderTests/RegexDSLTests.swift | 2 +- .../RegexTests/AlgorithmsInternalsTests.swift | 47 +++++++++++++++++++ Tests/RegexTests/AlgorithmsTests.swift | 33 +------------ Tests/RegexTests/AllScalarsTests.swift | 2 +- Tests/RegexTests/CompileTests.swift | 1 - 5 files changed, 50 insertions(+), 35 deletions(-) create mode 100644 Tests/RegexTests/AlgorithmsInternalsTests.swift diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index cd1c94657..8c6972481 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -11,7 +11,7 @@ import XCTest import _StringProcessing -@testable import RegexBuilder +import RegexBuilder class RegexDSLTests: XCTestCase { func _testDSLCaptures( diff --git a/Tests/RegexTests/AlgorithmsInternalsTests.swift b/Tests/RegexTests/AlgorithmsInternalsTests.swift new file mode 100644 index 000000000..f0d556744 --- /dev/null +++ b/Tests/RegexTests/AlgorithmsInternalsTests.swift @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +@testable import _StringProcessing +import XCTest + +// TODO: Protocol-powered testing +extension AlgorithmTests { + func testAdHoc() { + let r = try! Regex("a|b+") + + XCTAssert("palindrome".contains(r)) + XCTAssert("botany".contains(r)) + XCTAssert("antiquing".contains(r)) + XCTAssertFalse("cdef".contains(r)) + + let str = "a string with the letter b in it" + let first = str.firstRange(of: r) + let last = str.lastRange(of: r) + let (expectFirst, expectLast) = ( + str.index(atOffset: 0).. Date: Fri, 22 Apr 2022 19:05:50 -0500 Subject: [PATCH 2/8] Switch FixedPatternConsumer to be over Sequence --- .../Consumers/FixedPatternConsumer.swift | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift index e611f477a..8312c247a 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -struct FixedPatternConsumer +struct FixedPatternConsumer where Consumed.Element: Equatable, Pattern.Element == Consumed.Element { let pattern: Pattern @@ -21,20 +21,17 @@ extension FixedPatternConsumer: CollectionConsumer { in range: Range ) -> Consumed.Index? { var index = range.lowerBound - var patternIndex = pattern.startIndex + var patternIterator = pattern.makeIterator() - while true { - if patternIndex == pattern.endIndex { - return index - } - - if index == range.upperBound || consumed[index] != pattern[patternIndex] { + while let element = patternIterator.next() { + if index == range.upperBound || consumed[index] != element { return nil } consumed.formIndex(after: &index) - pattern.formIndex(after: &patternIndex) } + + return index } } From 08797a398e30248d275208fb268c1d3f12a560a9 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Fri, 22 Apr 2022 19:19:08 -0500 Subject: [PATCH 3/8] Update Sequence/Collection constraints --- .../Algorithms/Algorithms/Contains.swift | 8 ++--- .../Algorithms/Algorithms/FirstRange.swift | 28 ++++++++------- .../Algorithms/Algorithms/Ranges.swift | 19 +++++----- .../Algorithms/Algorithms/Replace.swift | 36 +++++++++---------- .../Algorithms/Algorithms/Split.swift | 6 ++-- .../Algorithms/Algorithms/Trim.swift | 6 ++-- 6 files changed, 53 insertions(+), 50 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift index 1d4332ad0..96414423a 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift @@ -28,16 +28,16 @@ extension Collection where Element: Equatable { /// - Returns: `true` if the collection contains the specified sequence, /// otherwise `false`. @available(SwiftStdlib 5.7, *) - public func contains(_ other: S) -> Bool - where S.Element == Element + public func contains(_ other: C) -> Bool + where C.Element == Element { firstRange(of: other) != nil } } extension BidirectionalCollection where Element: Comparable { - func contains(_ other: S) -> Bool - where S.Element == Element + func contains(_ other: C) -> Bool + where C.Element == Element { if #available(SwiftStdlib 5.7, *) { return firstRange(of: other) != nil diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift index 508c04663..42703827e 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift @@ -32,31 +32,33 @@ extension BidirectionalCollection { // MARK: Fixed pattern algorithms extension Collection where Element: Equatable { - /// Finds and returns the range of the first occurrence of a given sequence - /// within the collection. - /// - Parameter sequence: The sequence to search for. + /// Finds and returns the range of the first occurrence of a given collection + /// within this collection. + /// + /// - Parameter other: The collection to search for. /// - Returns: A range in the collection of the first occurrence of `sequence`. /// Returns nil if `sequence` is not found. @available(SwiftStdlib 5.7, *) - public func firstRange( - of sequence: S - ) -> Range? where S.Element == Element { + public func firstRange( + of other: C + ) -> Range? where C.Element == Element { // TODO: Use a more efficient search algorithm - let searcher = ZSearcher(pattern: Array(sequence), by: ==) + let searcher = ZSearcher(pattern: Array(other), by: ==) return searcher.search(self[...], in: startIndex..( - of other: S - ) -> Range? where S.Element == Element { + public func firstRange( + of other: C + ) -> Range? where C.Element == Element { let searcher = PatternOrEmpty( searcher: TwoWaySearcher(pattern: Array(other))) let slice = self[...] diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 853c73271..33a9748ac 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -175,9 +175,9 @@ extension BidirectionalCollection { // MARK: Fixed pattern algorithms extension Collection where Element: Equatable { - func ranges( - of other: S - ) -> RangesCollection> where S.Element == Element { + func ranges( + of other: C + ) -> RangesCollection> where C.Element == Element { ranges(of: ZSearcher(pattern: Array(other), by: ==)) } @@ -188,9 +188,9 @@ extension Collection where Element: Equatable { /// - Returns: A collection of ranges of all occurrences of `other`. Returns /// an empty collection if `other` is not found. @available(SwiftStdlib 5.7, *) - public func ranges( - of other: S - ) -> [Range] where S.Element == Element { + public func ranges( + of other: C + ) -> [Range] where C.Element == Element { ranges(of: ZSearcher(pattern: Array(other), by: ==)).map { $0 } } } @@ -207,10 +207,10 @@ extension BidirectionalCollection where Element: Equatable { } extension BidirectionalCollection where Element: Comparable { - func ranges( - of other: S + func ranges( + of other: C ) -> RangesCollection>> - where S.Element == Element + where C.Element == Element { ranges(of: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(other)))) } @@ -247,6 +247,7 @@ extension BidirectionalCollection where SubSequence == Substring { // FIXME: Return `some Collection>` for SE-0346 /// Finds and returns the ranges of the all occurrences of a given sequence /// within the collection. + /// /// - Parameter regex: The regex to search for. /// - Returns: A collection or ranges in the receiver of all occurrences of /// `regex`. Returns an empty collection if `regex` is not found. diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index 4a6da6c10..217fb90d6 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -78,12 +78,12 @@ extension RangeReplaceableCollection where Element: Equatable { /// - Returns: A new collection in which all occurrences of `other` in /// `subrange` of the collection are replaced by `replacement`. @available(SwiftStdlib 5.7, *) - public func replacing( - _ other: S, + public func replacing( + _ other: C, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( ZSearcher(pattern: Array(other), by: ==), with: replacement, @@ -101,11 +101,11 @@ extension RangeReplaceableCollection where Element: Equatable { /// - Returns: A new collection in which all occurrences of `other` in /// `subrange` of the collection are replaced by `replacement`. @available(SwiftStdlib 5.7, *) - public func replacing( - _ other: S, + public func replacing( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( other, with: replacement, @@ -120,11 +120,11 @@ extension RangeReplaceableCollection where Element: Equatable { /// - maxReplacements: A number specifying how many occurrences of `other` /// to replace. Default is `Int.max`. @available(SwiftStdlib 5.7, *) - public mutating func replace( - _ other: S, + public mutating func replace( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) where S.Element == Element, Replacement.Element == Element { + ) where C.Element == Element, Replacement.Element == Element { self = replacing( other, with: replacement, @@ -136,12 +136,12 @@ extension RangeReplaceableCollection where Element: Equatable { extension RangeReplaceableCollection where Self: BidirectionalCollection, Element: Comparable { - func replacing( - _ other: S, + func replacing( + _ other: C, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(other))), with: replacement, @@ -149,11 +149,11 @@ extension RangeReplaceableCollection maxReplacements: maxReplacements) } - func replacing( - _ other: S, + func replacing( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( other, with: replacement, @@ -161,11 +161,11 @@ extension RangeReplaceableCollection maxReplacements: maxReplacements) } - mutating func replace( - _ other: S, + mutating func replace( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) where S.Element == Element, Replacement.Element == Element { + ) where C.Element == Element, Replacement.Element == Element { self = replacing( other, with: replacement, diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 8c7a9832d..4fd0081bb 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -266,10 +266,10 @@ extension BidirectionalCollection where Element: Equatable { } extension BidirectionalCollection where Element: Comparable { - func split( - by separator: S + func split( + by separator: C ) -> SplitCollection>> - where S.Element == Element + where C.Element == Element { split( by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator)))) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index 73a5cd554..0d7bde56b 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -188,7 +188,7 @@ extension Collection where Element: Equatable { /// - Returns: A collection containing the elements of the collection that are /// not removed by `predicate`. @available(SwiftStdlib 5.7, *) - public func trimmingPrefix( + public func trimmingPrefix( _ prefix: Prefix ) -> SubSequence where Prefix.Element == Element { trimmingPrefix(FixedPatternConsumer(pattern: prefix)) @@ -202,7 +202,7 @@ extension Collection where SubSequence == Self, Element: Equatable { /// as its argument and returns a Boolean value indicating whether the /// element should be removed from the collection. @available(SwiftStdlib 5.7, *) - public mutating func trimPrefix( + public mutating func trimPrefix( _ prefix: Prefix ) where Prefix.Element == Element { trimPrefix(FixedPatternConsumer(pattern: prefix)) @@ -217,7 +217,7 @@ extension RangeReplaceableCollection where Element: Equatable { /// as its argument and returns a Boolean value indicating whether the /// element should be removed from the collection. @available(SwiftStdlib 5.7, *) - public mutating func trimPrefix( + public mutating func trimPrefix( _ prefix: Prefix ) where Prefix.Element == Element { trimPrefix(FixedPatternConsumer(pattern: prefix)) From b3dbd6992e592b866eb01ab3753939c29da23700 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Fri, 22 Apr 2022 19:20:27 -0500 Subject: [PATCH 4/8] Update trim(while:) - rethrowing and nonescaping This eliminates PredicateConsumer from the implementation so that the closure can throw. --- .../Algorithms/Algorithms/Trim.swift | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index 0d7bde56b..7411236da 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -102,21 +102,26 @@ extension RangeReplaceableCollection where Self: BidirectionalCollection { // MARK: Predicate algorithms extension Collection { - // TODO: Non-escaping and throwing - func trimmingPrefix( - while predicate: @escaping (Element) -> Bool - ) -> SubSequence { - trimmingPrefix(ManyConsumer(base: PredicateConsumer(predicate: predicate))) + fileprivate func endOfPrefix(while predicate: (Element) throws -> Bool) rethrows -> Index { + try firstIndex(where: { try !predicate($0) }) ?? endIndex + } + + @available(SwiftStdlib 5.7, *) + public func trimmingPrefix( + while predicate: (Element) throws -> Bool + ) rethrows -> SubSequence { + let end = try endOfPrefix(while: predicate) + return self[end...] } } extension Collection where SubSequence == Self { @available(SwiftStdlib 5.7, *) public mutating func trimPrefix( - while predicate: @escaping (Element) -> Bool - ) { - trimPrefix(ManyConsumer( - base: PredicateConsumer(predicate: predicate))) + while predicate: (Element) throws -> Bool + ) throws { + let end = try endOfPrefix(while: predicate) + self = self[end...] } } @@ -124,9 +129,10 @@ extension RangeReplaceableCollection { @_disfavoredOverload @available(SwiftStdlib 5.7, *) public mutating func trimPrefix( - while predicate: @escaping (Element) -> Bool - ) { - trimPrefix(ManyConsumer(base: PredicateConsumer(predicate: predicate))) + while predicate: (Element) throws -> Bool + ) rethrows { + let end = try endOfPrefix(while: predicate) + removeSubrange(startIndex.. Date: Fri, 22 Apr 2022 19:21:34 -0500 Subject: [PATCH 5/8] Add tests for trim methods --- Tests/RegexTests/AlgorithmsTests.swift | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index 068546dd8..2d7483e71 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -24,6 +24,14 @@ func output(_ s: @autoclosure () -> T) { } } +func makeSingleUseSequence(element: T, count: Int) -> UnfoldSequence { + var count = count + return sequence(state: ()) { _ in + defer { count -= 1 } + return count > 0 ? element : nil + } +} + class RegexConsumerTests: XCTestCase { func testRanges() { func expectRanges( @@ -79,6 +87,53 @@ class RegexConsumerTests: XCTestCase { expectSplit("a", "", ["", "a", ""]) expectSplit("a", "x", ["a"]) expectSplit("a", "a", ["", ""]) + + func testTrim() { + func expectTrim( + _ string: String, + _ regex: String, + _ expected: Substring, + file: StaticString = #file, line: UInt = #line + ) { + let regex = try! Regex(regex) + let actual = string.trimmingPrefix(regex) + XCTAssertEqual(actual, expected, file: file, line: line) + } + + expectTrim("", "", "") + expectTrim("", "x", "") + expectTrim("a", "", "a") + expectTrim("a", "x", "a") + expectTrim("___a", "_", "__a") + expectTrim("___a", "_+", "a") + + XCTAssertEqual("".trimmingPrefix("a"), "") + XCTAssertEqual("a".trimmingPrefix("a"), "") + XCTAssertEqual("b".trimmingPrefix("a"), "b") + XCTAssertEqual("a".trimmingPrefix(""), "a") + XCTAssertEqual("___a".trimmingPrefix("_"), "__a") + XCTAssertEqual("___a".trimmingPrefix("___"), "a") + XCTAssertEqual("___a".trimmingPrefix("____"), "___a") + XCTAssertEqual("___a".trimmingPrefix("___a"), "") + + do { + let prefix = makeSingleUseSequence(element: "_" as Character, count: 5) + XCTAssertEqual("_____a".trimmingPrefix(prefix), "a") + XCTAssertEqual("_____a".trimmingPrefix(prefix), "_____a") + } + do { + let prefix = makeSingleUseSequence(element: "_" as Character, count: 5) + XCTAssertEqual("a".trimmingPrefix(prefix), "a") + // The result of this next call is technically undefined, so this + // is just to test that it doesn't crash. + XCTAssertNotEqual("_____a".trimmingPrefix(prefix), "") + } + + XCTAssertEqual("".trimmingPrefix(while: \.isWhitespace), "") + XCTAssertEqual("a".trimmingPrefix(while: \.isWhitespace), "a") + XCTAssertEqual(" ".trimmingPrefix(while: \.isWhitespace), "") + XCTAssertEqual(" a".trimmingPrefix(while: \.isWhitespace), "a") + XCTAssertEqual("a ".trimmingPrefix(while: \.isWhitespace), "a ") } func testReplace() { From 49335917d1788d7a0cd3cae1707d4db10f412e58 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Sat, 23 Apr 2022 00:56:45 -0500 Subject: [PATCH 6/8] Add maxSplits and omitEmpty to split methods This plumbs those parameters down into the SplitCollection type, and removes Collection conformance for now because (a) we aren't using it, and (b) it looks tricky to implement properly. --- .../Algorithms/Algorithms/Split.swift | 266 +++++++++++------- Tests/RegexTests/AlgorithmsTests.swift | 84 +++++- 2 files changed, 251 insertions(+), 99 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 4fd0081bb..97d8c80dd 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -15,13 +15,28 @@ struct SplitCollection { public typealias Base = Searcher.Searched let ranges: RangesCollection - - init(ranges: RangesCollection) { + var maxSplits: Int + var omittingEmptySubsequences: Bool + + init( + ranges: RangesCollection, + maxSplits: Int, + omittingEmptySubsequences: Bool) + { self.ranges = ranges + self.maxSplits = maxSplits + self.omittingEmptySubsequences = omittingEmptySubsequences } - init(base: Base, searcher: Searcher) { + init( + base: Base, + searcher: Searcher, + maxSplits: Int, + omittingEmptySubsequences: Bool) + { self.ranges = base.ranges(of: searcher) + self.maxSplits = maxSplits + self.omittingEmptySubsequences = omittingEmptySubsequences } } @@ -30,97 +45,127 @@ extension SplitCollection: Sequence { let base: Base var index: Base.Index var ranges: RangesCollection.Iterator - var isDone: Bool - - init(ranges: RangesCollection) { + var maxSplits: Int + var omittingEmptySubsequences: Bool + + var splitCounter = 0 + var isDone = false + + init( + ranges: RangesCollection, + maxSplits: Int, + omittingEmptySubsequences: Bool + ) { self.base = ranges.base self.index = base.startIndex self.ranges = ranges.makeIterator() - self.isDone = false + self.maxSplits = maxSplits + self.omittingEmptySubsequences = omittingEmptySubsequences } public mutating func next() -> Base.SubSequence? { guard !isDone else { return nil } - guard let range = ranges.next() else { + /// Return the rest of base if it's non-empty or we're including + /// empty subsequences. + func finish() -> Base.SubSequence? { isDone = true - return base[index...] + return index == base.endIndex && omittingEmptySubsequences + ? nil + : base[index...] + } + + if splitCounter >= maxSplits { + return finish() } - defer { index = range.upperBound } - return base[index.. Iterator { - Iterator(ranges: ranges) - } -} - -extension SplitCollection: Collection { - public struct Index { - var start: Base.Index - var base: RangesCollection.Index - var isEndIndex: Bool - } - - public var startIndex: Index { - let base = ranges.startIndex - return Index(start: ranges.base.startIndex, base: base, isEndIndex: false) - } - - public var endIndex: Index { - Index(start: ranges.base.endIndex, base: ranges.endIndex, isEndIndex: true) - } - - public func formIndex(after index: inout Index) { - guard !index.isEndIndex else { fatalError("Cannot advance past endIndex") } - - if let range = index.base.range { - let newStart = range.upperBound - ranges.formIndex(after: &index.base) - index.start = newStart - } else { - index.isEndIndex = true - } - } - - public func index(after index: Index) -> Index { - var index = index - formIndex(after: &index) - return index - } - - public subscript(index: Index) -> Base.SubSequence { - guard !index.isEndIndex else { - fatalError("Cannot subscript using endIndex") - } - let end = index.base.range?.lowerBound ?? ranges.base.endIndex - return ranges.base[index.start.. Bool { - switch (lhs.isEndIndex, rhs.isEndIndex) { - case (false, false): - return lhs.start == rhs.start - case (let lhs, let rhs): - return lhs == rhs - } - } - - static func < (lhs: Self, rhs: Self) -> Bool { - switch (lhs.isEndIndex, rhs.isEndIndex) { - case (true, _): - return false - case (_, true): - return true - case (false, false): - return lhs.start < rhs.start - } - } -} +//extension SplitCollection: Collection { +// public struct Index { +// var start: Base.Index +// var base: RangesCollection.Index +// var isEndIndex: Bool +// } +// +// public var startIndex: Index { +// let base = ranges.startIndex +// return Index(start: ranges.base.startIndex, base: base, isEndIndex: false) +// } +// +// public var endIndex: Index { +// Index(start: ranges.base.endIndex, base: ranges.endIndex, isEndIndex: true) +// } +// +// public func formIndex(after index: inout Index) { +// guard !index.isEndIndex else { fatalError("Cannot advance past endIndex") } +// +// if let range = index.base.range { +// let newStart = range.upperBound +// ranges.formIndex(after: &index.base) +// index.start = newStart +// } else { +// index.isEndIndex = true +// } +// } +// +// public func index(after index: Index) -> Index { +// var index = index +// formIndex(after: &index) +// return index +// } +// +// public subscript(index: Index) -> Base.SubSequence { +// guard !index.isEndIndex else { +// fatalError("Cannot subscript using endIndex") +// } +// let end = index.base.range?.lowerBound ?? ranges.base.endIndex +// return ranges.base[index.start.. Bool { +// switch (lhs.isEndIndex, rhs.isEndIndex) { +// case (false, false): +// return lhs.start == rhs.start +// case (let lhs, let rhs): +// return lhs == rhs +// } +// } +// +// static func < (lhs: Self, rhs: Self) -> Bool { +// switch (lhs.isEndIndex, rhs.isEndIndex) { +// case (true, _): +// return false +// case (_, true): +// return true +// case (false, false): +// return lhs.start < rhs.start +// } +// } +//} // MARK: `ReversedSplitCollection` @@ -176,10 +221,15 @@ extension ReversedSplitCollection: Sequence { extension Collection { func split( - by separator: Searcher + by separator: Searcher, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection where Searcher.Searched == Self { - // TODO: `maxSplits`, `omittingEmptySubsequences`? - SplitCollection(base: self, searcher: separator) + SplitCollection( + base: self, + searcher: separator, + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences) } } @@ -198,9 +248,11 @@ extension BidirectionalCollection { extension Collection { // TODO: Non-escaping and throwing func split( - whereSeparator predicate: @escaping (Element) -> Bool + whereSeparator predicate: @escaping (Element) -> Bool, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection> { - split(by: PredicateConsumer(predicate: predicate)) + split(by: PredicateConsumer(predicate: predicate), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } } @@ -216,9 +268,11 @@ extension BidirectionalCollection where Element: Equatable { extension Collection where Element: Equatable { func split( - by separator: Element + by separator: Element, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection> { - split(whereSeparator: { $0 == separator }) + split(whereSeparator: { $0 == separator }, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } } @@ -234,10 +288,12 @@ extension BidirectionalCollection where Element: Equatable { extension Collection where Element: Equatable { @_disfavoredOverload - func split( - by separator: S - ) -> SplitCollection> where S.Element == Element { - split(by: ZSearcher(pattern: Array(separator), by: ==)) + func split( + by separator: C, + maxSplits: Int, + omittingEmptySubsequences: Bool + ) -> SplitCollection> where C.Element == Element { + split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } // FIXME: Return `some Collection` for SE-0346 @@ -247,10 +303,12 @@ extension Collection where Element: Equatable { /// - Returns: A collection of subsequences, split from this collection's /// elements. @available(SwiftStdlib 5.7, *) - public func split( - by separator: S - ) -> [SubSequence] where S.Element == Element { - Array(split(by: ZSearcher(pattern: Array(separator), by: ==))) + public func split( + separator: C, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true + ) -> [SubSequence] where C.Element == Element { + Array(split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) } } @@ -267,12 +325,15 @@ extension BidirectionalCollection where Element: Equatable { extension BidirectionalCollection where Element: Comparable { func split( - by separator: C + by separator: C, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection>> where C.Element == Element { split( - by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator)))) + by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator))), + maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } // FIXME @@ -292,9 +353,11 @@ extension BidirectionalCollection where Element: Comparable { extension BidirectionalCollection where SubSequence == Substring { @_disfavoredOverload func split( - by separator: R + by separator: R, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection> { - split(by: RegexConsumer(separator)) + split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } func splitFromBack( @@ -303,15 +366,22 @@ extension BidirectionalCollection where SubSequence == Substring { splitFromBack(by: RegexConsumer(separator)) } - // FIXME: Return `some Collection` for SE-0346 + // TODO: Is this @_disfavoredOverload necessary? + // It prevents split(separator: String) from choosing this overload instead + // of the collection-based version when String has RegexComponent conformance + + // FIXME: Return `some Collection` for SE-0346 /// Returns the longest possible subsequences of the collection, in order, /// around elements equal to the given separator. /// - Parameter separator: A regex describing elements to be split upon. /// - Returns: A collection of substrings, split from this collection's /// elements. + @_disfavoredOverload public func split( - by separator: R + separator: R, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true ) -> [SubSequence] { - Array(split(by: RegexConsumer(separator))) + Array(split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) } } diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index 2d7483e71..4616d07b9 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -78,7 +78,7 @@ class RegexConsumerTests: XCTestCase { file: StaticString = #file, line: UInt = #line ) { let regex = try! Regex(regex) - let actual = Array(string.split(by: regex)) + let actual = Array(string.split(separator: regex, omittingEmptySubsequences: false)) XCTAssertEqual(actual, expected, file: file, line: line) } @@ -87,6 +87,88 @@ class RegexConsumerTests: XCTestCase { expectSplit("a", "", ["", "a", ""]) expectSplit("a", "x", ["a"]) expectSplit("a", "a", ["", ""]) + expectSplit("a____a____a", "_+", ["a", "a", "a"]) + expectSplit("____a____a____a____", "_+", ["", "a", "a", "a", ""]) + } + + func testSplitPermutations() throws { + let splitRegex = try Regex(#"\|"#) + XCTAssertEqual( + "a|a|||a|a".split(separator: splitRegex), + ["a", "a", "a", "a"]) + XCTAssertEqual( + "a|a|||a|a".split(separator: splitRegex, omittingEmptySubsequences: false), + ["a", "a", "", "", "a", "a"]) + XCTAssertEqual( + "a|a|||a|a".split(separator: splitRegex, maxSplits: 2), + ["a", "a", "||a|a"]) + + XCTAssertEqual( + "a|a|||a|a|||a|a|||".split(separator: "|||"), + ["a|a", "a|a", "a|a"]) + XCTAssertEqual( + "a|a|||a|a|||a|a|||".split(separator: "|||", omittingEmptySubsequences: false), + ["a|a", "a|a", "a|a", ""]) + XCTAssertEqual( + "a|a|||a|a|||a|a|||".split(separator: "|||", maxSplits: 2), + ["a|a", "a|a", "a|a|||"]) + + XCTAssertEqual( + "aaaa".split(separator: ""), + ["a", "a", "a", "a"]) + XCTAssertEqual( + "aaaa".split(separator: "", omittingEmptySubsequences: false), + ["", "a", "a", "a", "a", ""]) + XCTAssertEqual( + "aaaa".split(separator: "", maxSplits: 2), + ["a", "a", "aa"]) + XCTAssertEqual( + "aaaa".split(separator: "", maxSplits: 2, omittingEmptySubsequences: false), + ["", "a", "aaa"]) + + // Fuzzing the input and parameters + for _ in 1...1_000 { + // Make strings that look like: + // "aaaaaaa" + // "|||aaaa||||" + // "a|a|aa|aa|" + // "|a||||aaa|a|||" + // "a|aa" + let keepCount = Int.random(in: 0...10) + let splitCount = Int.random(in: 0...10) + let str = [repeatElement("a", count: keepCount), repeatElement("|", count: splitCount)] + .joined() + .shuffled() + .joined() + + let omitEmpty = Bool.random() + let maxSplits = Bool.random() ? Int.max : Int.random(in: 0...10) + + // Use the stdlib behavior as the expected outcome + let expected = str.split( + separator: "|" as Character, + maxSplits: maxSplits, + omittingEmptySubsequences: omitEmpty) + let regexActual = str.split( + separator: splitRegex, + maxSplits: maxSplits, + omittingEmptySubsequences: omitEmpty) + let stringActual = str.split( + separator: "|" as String, + maxSplits: maxSplits, + omittingEmptySubsequences: omitEmpty) + XCTAssertEqual(regexActual, expected, """ + Mismatch in regex split of '\(str)', maxSplits: \(maxSplits), omitEmpty: \(omitEmpty) + expected: \(expected.map(String.init)) + actual: \(regexActual.map(String.init)) + """) + XCTAssertEqual(stringActual, expected, """ + Mismatch in string split of '\(str)', maxSplits: \(maxSplits), omitEmpty: \(omitEmpty) + expected: \(expected.map(String.init)) + actual: \(regexActual.map(String.init)) + """) + } + } func testTrim() { func expectTrim( From 709659bc59f51a0df45f0dac6f087f9800c70a99 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Sat, 23 Apr 2022 12:22:55 -0500 Subject: [PATCH 7/8] Add tests / fixes for contains / firstRange(of:) Better test coverage for both the regex and collection variants of these two methods, plus overloads for contains(_:) to win over Foundation's version from the overlay. Also fixes an error in TwoWaySearcher that tried to offset past the searched string's endIndex; might be a sign of something awry, but tests seem to be otherwise succeeding. --- .../Algorithms/Algorithms/Contains.swift | 14 ++++++ .../Algorithms/Searchers/TwoWaySearcher.swift | 9 +++- Tests/RegexTests/AlgorithmsTests.swift | 50 ++++++++++++++++++- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift index 96414423a..2a1ef72a2 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift @@ -46,6 +46,20 @@ extension BidirectionalCollection where Element: Comparable { } } +// Overload breakers + +extension StringProtocol { + @available(SwiftStdlib 5.7, *) + public func contains(_ other: String) -> Bool { + firstRange(of: other) != nil + } + + @available(SwiftStdlib 5.7, *) + public func contains(_ other: Substring) -> Bool { + firstRange(of: other) != nil + } +} + // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { diff --git a/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift b/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift index 5530b4421..ae613cfd7 100644 --- a/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift +++ b/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift @@ -47,8 +47,10 @@ extension TwoWaySearcher: CollectionSearcher { for searched: Searched, in range: Range ) -> State { + // FIXME: Is this 'limitedBy' requirement a sign of error? let criticalIndex = searched.index( - range.lowerBound, offsetBy: criticalIndex) + range.lowerBound, offsetBy: criticalIndex, limitedBy: range.upperBound) + ?? range.upperBound return State( end: range.upperBound, index: range.lowerBound, @@ -66,7 +68,10 @@ extension TwoWaySearcher: CollectionSearcher { let start = _searchLeft(searched, &state, end) { state.index = end - state.criticalIndex = searched.index(end, offsetBy: criticalIndex) + // FIXME: Is this 'limitedBy' requirement a sign of error? + state.criticalIndex = searched.index( + end, offsetBy: criticalIndex, limitedBy: searched.endIndex) + ?? searched.endIndex state.memory = nil return start..(element: T, count: Int) -> UnfoldSequence } } -class RegexConsumerTests: XCTestCase { +class AlgorithmTests: XCTestCase { + func testContains() { + XCTAssertTrue("".contains("")) + XCTAssertTrue("abcde".contains("")) + XCTAssertTrue("abcde".contains("abcd")) + XCTAssertTrue("abcde".contains("bcde")) + XCTAssertTrue("abcde".contains("bcd")) + XCTAssertTrue("ababacabababa".contains("abababa")) + + XCTAssertFalse("".contains("abcd")) + + for start in 0..<9 { + for end in start..<9 { + XCTAssertTrue((0..<10).contains(start...end)) + XCTAssertFalse((0..<10).contains(start...10)) + } + } + } + func testRanges() { func expectRanges( _ string: String, @@ -48,6 +66,9 @@ class RegexConsumerTests: XCTestCase { // `IndexingIterator` tests the collection conformance let actualCol: [Range] = string[...].ranges(of: regex)[...].map(string.offsets(of:)) XCTAssertEqual(actualCol, expected, file: file, line: line) + + let firstRange = string.firstRange(of: regex).map(string.offsets(of:)) + XCTAssertEqual(firstRange, expected.first, file: file, line: line) } expectRanges("", "", [0..<0]) @@ -68,6 +89,31 @@ class RegexConsumerTests: XCTestCase { expectRanges("abc", "(a|b)*", [0..<2, 2..<2, 3..<3]) expectRanges("abc", "(b|c)+", [1..<3]) expectRanges("abc", "(b|c)*", [0..<0, 1..<3, 3..<3]) + + func expectStringRanges( + _ input: String, + _ pattern: String, + _ expected: [Range], + file: StaticString = #file, line: UInt = #line + ) { + let actualSeq: [Range] = input.ranges(of: pattern).map(input.offsets(of:)) + XCTAssertEqual(actualSeq, expected, file: file, line: line) + + // `IndexingIterator` tests the collection conformance + let actualCol: [Range] = input.ranges(of: pattern)[...].map(input.offsets(of:)) + XCTAssertEqual(actualCol, expected, file: file, line: line) + + let firstRange = input.firstRange(of: pattern).map(input.offsets(of:)) + XCTAssertEqual(firstRange, expected.first, file: file, line: line) + } + + expectStringRanges("", "", [0..<0]) + expectStringRanges("abcde", "", [0..<0, 1..<1, 2..<2, 3..<3, 4..<4, 5..<5]) + expectStringRanges("abcde", "abcd", [0..<4]) + expectStringRanges("abcde", "bcde", [1..<5]) + expectStringRanges("abcde", "bcd", [1..<4]) + expectStringRanges("ababacabababa", "abababa", [6..<13]) + expectStringRanges("ababacabababa", "aba", [0..<3, 6..<9, 10..<13]) } func testSplit() { From 9a394720132f8acfbe7e4cf54e2a204c58634e7e Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Sun, 24 Apr 2022 12:48:48 -0500 Subject: [PATCH 8/8] Test to ensure stdlib `split` is still accessible --- Tests/RegexTests/AlgorithmsTests.swift | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index c42ff175f..965e2c42e 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -135,6 +135,12 @@ class AlgorithmTests: XCTestCase { expectSplit("a", "a", ["", ""]) expectSplit("a____a____a", "_+", ["a", "a", "a"]) expectSplit("____a____a____a____", "_+", ["", "a", "a", "a", ""]) + + // Test that original `split` functions are still accessible + let splitRef = "abcd".split + XCTAssert(type(of: splitRef) == ((Character, Int, Bool) -> [Substring]).self) + let splitParamsRef = "abcd".split(separator:maxSplits:omittingEmptySubsequences:) + XCTAssert(type(of: splitParamsRef) == ((Character, Int, Bool) -> [Substring]).self) } func testSplitPermutations() throws {