diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift index 1d4332ad0..2a1ef72a2 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift @@ -28,16 +28,16 @@ extension Collection where Element: Equatable { /// - Returns: `true` if the collection contains the specified sequence, /// otherwise `false`. @available(SwiftStdlib 5.7, *) - public func contains(_ other: S) -> Bool - where S.Element == Element + public func contains(_ other: C) -> Bool + where C.Element == Element { firstRange(of: other) != nil } } extension BidirectionalCollection where Element: Comparable { - func contains(_ other: S) -> Bool - where S.Element == Element + func contains(_ other: C) -> Bool + where C.Element == Element { if #available(SwiftStdlib 5.7, *) { return firstRange(of: other) != nil @@ -46,6 +46,20 @@ extension BidirectionalCollection where Element: Comparable { } } +// Overload breakers + +extension StringProtocol { + @available(SwiftStdlib 5.7, *) + public func contains(_ other: String) -> Bool { + firstRange(of: other) != nil + } + + @available(SwiftStdlib 5.7, *) + public func contains(_ other: Substring) -> Bool { + firstRange(of: other) != nil + } +} + // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift index 508c04663..42703827e 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift @@ -32,31 +32,33 @@ extension BidirectionalCollection { // MARK: Fixed pattern algorithms extension Collection where Element: Equatable { - /// Finds and returns the range of the first occurrence of a given sequence - /// within the collection. - /// - Parameter sequence: The sequence to search for. + /// Finds and returns the range of the first occurrence of a given collection + /// within this collection. + /// + /// - Parameter other: The collection to search for. /// - Returns: A range in the collection of the first occurrence of `sequence`. /// Returns nil if `sequence` is not found. @available(SwiftStdlib 5.7, *) - public func firstRange( - of sequence: S - ) -> Range? where S.Element == Element { + public func firstRange( + of other: C + ) -> Range? where C.Element == Element { // TODO: Use a more efficient search algorithm - let searcher = ZSearcher(pattern: Array(sequence), by: ==) + let searcher = ZSearcher(pattern: Array(other), by: ==) return searcher.search(self[...], in: startIndex..( - of other: S - ) -> Range? where S.Element == Element { + public func firstRange( + of other: C + ) -> Range? where C.Element == Element { let searcher = PatternOrEmpty( searcher: TwoWaySearcher(pattern: Array(other))) let slice = self[...] diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 853c73271..33a9748ac 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -175,9 +175,9 @@ extension BidirectionalCollection { // MARK: Fixed pattern algorithms extension Collection where Element: Equatable { - func ranges( - of other: S - ) -> RangesCollection> where S.Element == Element { + func ranges( + of other: C + ) -> RangesCollection> where C.Element == Element { ranges(of: ZSearcher(pattern: Array(other), by: ==)) } @@ -188,9 +188,9 @@ extension Collection where Element: Equatable { /// - Returns: A collection of ranges of all occurrences of `other`. Returns /// an empty collection if `other` is not found. @available(SwiftStdlib 5.7, *) - public func ranges( - of other: S - ) -> [Range] where S.Element == Element { + public func ranges( + of other: C + ) -> [Range] where C.Element == Element { ranges(of: ZSearcher(pattern: Array(other), by: ==)).map { $0 } } } @@ -207,10 +207,10 @@ extension BidirectionalCollection where Element: Equatable { } extension BidirectionalCollection where Element: Comparable { - func ranges( - of other: S + func ranges( + of other: C ) -> RangesCollection>> - where S.Element == Element + where C.Element == Element { ranges(of: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(other)))) } @@ -247,6 +247,7 @@ extension BidirectionalCollection where SubSequence == Substring { // FIXME: Return `some Collection>` for SE-0346 /// Finds and returns the ranges of the all occurrences of a given sequence /// within the collection. + /// /// - Parameter regex: The regex to search for. /// - Returns: A collection or ranges in the receiver of all occurrences of /// `regex`. Returns an empty collection if `regex` is not found. diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index 4a6da6c10..217fb90d6 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -78,12 +78,12 @@ extension RangeReplaceableCollection where Element: Equatable { /// - Returns: A new collection in which all occurrences of `other` in /// `subrange` of the collection are replaced by `replacement`. @available(SwiftStdlib 5.7, *) - public func replacing( - _ other: S, + public func replacing( + _ other: C, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( ZSearcher(pattern: Array(other), by: ==), with: replacement, @@ -101,11 +101,11 @@ extension RangeReplaceableCollection where Element: Equatable { /// - Returns: A new collection in which all occurrences of `other` in /// `subrange` of the collection are replaced by `replacement`. @available(SwiftStdlib 5.7, *) - public func replacing( - _ other: S, + public func replacing( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( other, with: replacement, @@ -120,11 +120,11 @@ extension RangeReplaceableCollection where Element: Equatable { /// - maxReplacements: A number specifying how many occurrences of `other` /// to replace. Default is `Int.max`. @available(SwiftStdlib 5.7, *) - public mutating func replace( - _ other: S, + public mutating func replace( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) where S.Element == Element, Replacement.Element == Element { + ) where C.Element == Element, Replacement.Element == Element { self = replacing( other, with: replacement, @@ -136,12 +136,12 @@ extension RangeReplaceableCollection where Element: Equatable { extension RangeReplaceableCollection where Self: BidirectionalCollection, Element: Comparable { - func replacing( - _ other: S, + func replacing( + _ other: C, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(other))), with: replacement, @@ -149,11 +149,11 @@ extension RangeReplaceableCollection maxReplacements: maxReplacements) } - func replacing( - _ other: S, + func replacing( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) -> Self where S.Element == Element, Replacement.Element == Element { + ) -> Self where C.Element == Element, Replacement.Element == Element { replacing( other, with: replacement, @@ -161,11 +161,11 @@ extension RangeReplaceableCollection maxReplacements: maxReplacements) } - mutating func replace( - _ other: S, + mutating func replace( + _ other: C, with replacement: Replacement, maxReplacements: Int = .max - ) where S.Element == Element, Replacement.Element == Element { + ) where C.Element == Element, Replacement.Element == Element { self = replacing( other, with: replacement, diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 8c7a9832d..97d8c80dd 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -15,13 +15,28 @@ struct SplitCollection { public typealias Base = Searcher.Searched let ranges: RangesCollection - - init(ranges: RangesCollection) { + var maxSplits: Int + var omittingEmptySubsequences: Bool + + init( + ranges: RangesCollection, + maxSplits: Int, + omittingEmptySubsequences: Bool) + { self.ranges = ranges + self.maxSplits = maxSplits + self.omittingEmptySubsequences = omittingEmptySubsequences } - init(base: Base, searcher: Searcher) { + init( + base: Base, + searcher: Searcher, + maxSplits: Int, + omittingEmptySubsequences: Bool) + { self.ranges = base.ranges(of: searcher) + self.maxSplits = maxSplits + self.omittingEmptySubsequences = omittingEmptySubsequences } } @@ -30,97 +45,127 @@ extension SplitCollection: Sequence { let base: Base var index: Base.Index var ranges: RangesCollection.Iterator - var isDone: Bool - - init(ranges: RangesCollection) { + var maxSplits: Int + var omittingEmptySubsequences: Bool + + var splitCounter = 0 + var isDone = false + + init( + ranges: RangesCollection, + maxSplits: Int, + omittingEmptySubsequences: Bool + ) { self.base = ranges.base self.index = base.startIndex self.ranges = ranges.makeIterator() - self.isDone = false + self.maxSplits = maxSplits + self.omittingEmptySubsequences = omittingEmptySubsequences } public mutating func next() -> Base.SubSequence? { guard !isDone else { return nil } - guard let range = ranges.next() else { + /// Return the rest of base if it's non-empty or we're including + /// empty subsequences. + func finish() -> Base.SubSequence? { isDone = true - return base[index...] + return index == base.endIndex && omittingEmptySubsequences + ? nil + : base[index...] } - defer { index = range.upperBound } - return base[index..= maxSplits { + return finish() + } + + while true { + // If there are no more ranges that matched, return the rest of `base`. + guard let range = ranges.next() else { + return finish() + } + + defer { index = range.upperBound } + + if omittingEmptySubsequences && index == range.lowerBound { + continue + } + + splitCounter += 1 + return base[index.. Iterator { - Iterator(ranges: ranges) - } -} - -extension SplitCollection: Collection { - public struct Index { - var start: Base.Index - var base: RangesCollection.Index - var isEndIndex: Bool - } - - public var startIndex: Index { - let base = ranges.startIndex - return Index(start: ranges.base.startIndex, base: base, isEndIndex: false) - } - - public var endIndex: Index { - Index(start: ranges.base.endIndex, base: ranges.endIndex, isEndIndex: true) - } - - public func formIndex(after index: inout Index) { - guard !index.isEndIndex else { fatalError("Cannot advance past endIndex") } - - if let range = index.base.range { - let newStart = range.upperBound - ranges.formIndex(after: &index.base) - index.start = newStart - } else { - index.isEndIndex = true - } - } - - public func index(after index: Index) -> Index { - var index = index - formIndex(after: &index) - return index - } - - public subscript(index: Index) -> Base.SubSequence { - guard !index.isEndIndex else { - fatalError("Cannot subscript using endIndex") - } - let end = index.base.range?.lowerBound ?? ranges.base.endIndex - return ranges.base[index.start.. Bool { - switch (lhs.isEndIndex, rhs.isEndIndex) { - case (false, false): - return lhs.start == rhs.start - case (let lhs, let rhs): - return lhs == rhs - } - } - - static func < (lhs: Self, rhs: Self) -> Bool { - switch (lhs.isEndIndex, rhs.isEndIndex) { - case (true, _): - return false - case (_, true): - return true - case (false, false): - return lhs.start < rhs.start - } - } -} +//extension SplitCollection: Collection { +// public struct Index { +// var start: Base.Index +// var base: RangesCollection.Index +// var isEndIndex: Bool +// } +// +// public var startIndex: Index { +// let base = ranges.startIndex +// return Index(start: ranges.base.startIndex, base: base, isEndIndex: false) +// } +// +// public var endIndex: Index { +// Index(start: ranges.base.endIndex, base: ranges.endIndex, isEndIndex: true) +// } +// +// public func formIndex(after index: inout Index) { +// guard !index.isEndIndex else { fatalError("Cannot advance past endIndex") } +// +// if let range = index.base.range { +// let newStart = range.upperBound +// ranges.formIndex(after: &index.base) +// index.start = newStart +// } else { +// index.isEndIndex = true +// } +// } +// +// public func index(after index: Index) -> Index { +// var index = index +// formIndex(after: &index) +// return index +// } +// +// public subscript(index: Index) -> Base.SubSequence { +// guard !index.isEndIndex else { +// fatalError("Cannot subscript using endIndex") +// } +// let end = index.base.range?.lowerBound ?? ranges.base.endIndex +// return ranges.base[index.start.. Bool { +// switch (lhs.isEndIndex, rhs.isEndIndex) { +// case (false, false): +// return lhs.start == rhs.start +// case (let lhs, let rhs): +// return lhs == rhs +// } +// } +// +// static func < (lhs: Self, rhs: Self) -> Bool { +// switch (lhs.isEndIndex, rhs.isEndIndex) { +// case (true, _): +// return false +// case (_, true): +// return true +// case (false, false): +// return lhs.start < rhs.start +// } +// } +//} // MARK: `ReversedSplitCollection` @@ -176,10 +221,15 @@ extension ReversedSplitCollection: Sequence { extension Collection { func split( - by separator: Searcher + by separator: Searcher, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection where Searcher.Searched == Self { - // TODO: `maxSplits`, `omittingEmptySubsequences`? - SplitCollection(base: self, searcher: separator) + SplitCollection( + base: self, + searcher: separator, + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences) } } @@ -198,9 +248,11 @@ extension BidirectionalCollection { extension Collection { // TODO: Non-escaping and throwing func split( - whereSeparator predicate: @escaping (Element) -> Bool + whereSeparator predicate: @escaping (Element) -> Bool, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection> { - split(by: PredicateConsumer(predicate: predicate)) + split(by: PredicateConsumer(predicate: predicate), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } } @@ -216,9 +268,11 @@ extension BidirectionalCollection where Element: Equatable { extension Collection where Element: Equatable { func split( - by separator: Element + by separator: Element, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection> { - split(whereSeparator: { $0 == separator }) + split(whereSeparator: { $0 == separator }, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } } @@ -234,10 +288,12 @@ extension BidirectionalCollection where Element: Equatable { extension Collection where Element: Equatable { @_disfavoredOverload - func split( - by separator: S - ) -> SplitCollection> where S.Element == Element { - split(by: ZSearcher(pattern: Array(separator), by: ==)) + func split( + by separator: C, + maxSplits: Int, + omittingEmptySubsequences: Bool + ) -> SplitCollection> where C.Element == Element { + split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } // FIXME: Return `some Collection` for SE-0346 @@ -247,10 +303,12 @@ extension Collection where Element: Equatable { /// - Returns: A collection of subsequences, split from this collection's /// elements. @available(SwiftStdlib 5.7, *) - public func split( - by separator: S - ) -> [SubSequence] where S.Element == Element { - Array(split(by: ZSearcher(pattern: Array(separator), by: ==))) + public func split( + separator: C, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true + ) -> [SubSequence] where C.Element == Element { + Array(split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) } } @@ -266,13 +324,16 @@ extension BidirectionalCollection where Element: Equatable { } extension BidirectionalCollection where Element: Comparable { - func split( - by separator: S + func split( + by separator: C, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection>> - where S.Element == Element + where C.Element == Element { split( - by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator)))) + by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator))), + maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } // FIXME @@ -292,9 +353,11 @@ extension BidirectionalCollection where Element: Comparable { extension BidirectionalCollection where SubSequence == Substring { @_disfavoredOverload func split( - by separator: R + by separator: R, + maxSplits: Int, + omittingEmptySubsequences: Bool ) -> SplitCollection> { - split(by: RegexConsumer(separator)) + split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } func splitFromBack( @@ -303,15 +366,22 @@ extension BidirectionalCollection where SubSequence == Substring { splitFromBack(by: RegexConsumer(separator)) } - // FIXME: Return `some Collection` for SE-0346 + // TODO: Is this @_disfavoredOverload necessary? + // It prevents split(separator: String) from choosing this overload instead + // of the collection-based version when String has RegexComponent conformance + + // FIXME: Return `some Collection` for SE-0346 /// Returns the longest possible subsequences of the collection, in order, /// around elements equal to the given separator. /// - Parameter separator: A regex describing elements to be split upon. /// - Returns: A collection of substrings, split from this collection's /// elements. + @_disfavoredOverload public func split( - by separator: R + separator: R, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true ) -> [SubSequence] { - Array(split(by: RegexConsumer(separator))) + Array(split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index 73a5cd554..7411236da 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -102,21 +102,26 @@ extension RangeReplaceableCollection where Self: BidirectionalCollection { // MARK: Predicate algorithms extension Collection { - // TODO: Non-escaping and throwing - func trimmingPrefix( - while predicate: @escaping (Element) -> Bool - ) -> SubSequence { - trimmingPrefix(ManyConsumer(base: PredicateConsumer(predicate: predicate))) + fileprivate func endOfPrefix(while predicate: (Element) throws -> Bool) rethrows -> Index { + try firstIndex(where: { try !predicate($0) }) ?? endIndex + } + + @available(SwiftStdlib 5.7, *) + public func trimmingPrefix( + while predicate: (Element) throws -> Bool + ) rethrows -> SubSequence { + let end = try endOfPrefix(while: predicate) + return self[end...] } } extension Collection where SubSequence == Self { @available(SwiftStdlib 5.7, *) public mutating func trimPrefix( - while predicate: @escaping (Element) -> Bool - ) { - trimPrefix(ManyConsumer( - base: PredicateConsumer(predicate: predicate))) + while predicate: (Element) throws -> Bool + ) throws { + let end = try endOfPrefix(while: predicate) + self = self[end...] } } @@ -124,9 +129,10 @@ extension RangeReplaceableCollection { @_disfavoredOverload @available(SwiftStdlib 5.7, *) public mutating func trimPrefix( - while predicate: @escaping (Element) -> Bool - ) { - trimPrefix(ManyConsumer(base: PredicateConsumer(predicate: predicate))) + while predicate: (Element) throws -> Bool + ) rethrows { + let end = try endOfPrefix(while: predicate) + removeSubrange(startIndex..( + public func trimmingPrefix( _ prefix: Prefix ) -> SubSequence where Prefix.Element == Element { trimmingPrefix(FixedPatternConsumer(pattern: prefix)) @@ -202,7 +208,7 @@ extension Collection where SubSequence == Self, Element: Equatable { /// as its argument and returns a Boolean value indicating whether the /// element should be removed from the collection. @available(SwiftStdlib 5.7, *) - public mutating func trimPrefix( + public mutating func trimPrefix( _ prefix: Prefix ) where Prefix.Element == Element { trimPrefix(FixedPatternConsumer(pattern: prefix)) @@ -217,7 +223,7 @@ extension RangeReplaceableCollection where Element: Equatable { /// as its argument and returns a Boolean value indicating whether the /// element should be removed from the collection. @available(SwiftStdlib 5.7, *) - public mutating func trimPrefix( + public mutating func trimPrefix( _ prefix: Prefix ) where Prefix.Element == Element { trimPrefix(FixedPatternConsumer(pattern: prefix)) diff --git a/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift index e611f477a..8312c247a 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/FixedPatternConsumer.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -struct FixedPatternConsumer +struct FixedPatternConsumer where Consumed.Element: Equatable, Pattern.Element == Consumed.Element { let pattern: Pattern @@ -21,20 +21,17 @@ extension FixedPatternConsumer: CollectionConsumer { in range: Range ) -> Consumed.Index? { var index = range.lowerBound - var patternIndex = pattern.startIndex + var patternIterator = pattern.makeIterator() - while true { - if patternIndex == pattern.endIndex { - return index - } - - if index == range.upperBound || consumed[index] != pattern[patternIndex] { + while let element = patternIterator.next() { + if index == range.upperBound || consumed[index] != element { return nil } consumed.formIndex(after: &index) - pattern.formIndex(after: &patternIndex) } + + return index } } diff --git a/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift b/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift index 5530b4421..ae613cfd7 100644 --- a/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift +++ b/Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift @@ -47,8 +47,10 @@ extension TwoWaySearcher: CollectionSearcher { for searched: Searched, in range: Range ) -> State { + // FIXME: Is this 'limitedBy' requirement a sign of error? let criticalIndex = searched.index( - range.lowerBound, offsetBy: criticalIndex) + range.lowerBound, offsetBy: criticalIndex, limitedBy: range.upperBound) + ?? range.upperBound return State( end: range.upperBound, index: range.lowerBound, @@ -66,7 +68,10 @@ extension TwoWaySearcher: CollectionSearcher { let start = _searchLeft(searched, &state, end) { state.index = end - state.criticalIndex = searched.index(end, offsetBy: criticalIndex) + // FIXME: Is this 'limitedBy' requirement a sign of error? + state.criticalIndex = searched.index( + end, offsetBy: criticalIndex, limitedBy: searched.endIndex) + ?? searched.endIndex state.memory = nil return start..( diff --git a/Tests/RegexTests/AlgorithmsInternalsTests.swift b/Tests/RegexTests/AlgorithmsInternalsTests.swift new file mode 100644 index 000000000..f0d556744 --- /dev/null +++ b/Tests/RegexTests/AlgorithmsInternalsTests.swift @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +@testable import _StringProcessing +import XCTest + +// TODO: Protocol-powered testing +extension AlgorithmTests { + func testAdHoc() { + let r = try! Regex("a|b+") + + XCTAssert("palindrome".contains(r)) + XCTAssert("botany".contains(r)) + XCTAssert("antiquing".contains(r)) + XCTAssertFalse("cdef".contains(r)) + + let str = "a string with the letter b in it" + let first = str.firstRange(of: r) + let last = str.lastRange(of: r) + let (expectFirst, expectLast) = ( + str.index(atOffset: 0)..(_ s: @autoclosure () -> T) { } } -class RegexConsumerTests: XCTestCase { +func makeSingleUseSequence(element: T, count: Int) -> UnfoldSequence { + var count = count + return sequence(state: ()) { _ in + defer { count -= 1 } + return count > 0 ? element : nil + } +} + +class AlgorithmTests: XCTestCase { + func testContains() { + XCTAssertTrue("".contains("")) + XCTAssertTrue("abcde".contains("")) + XCTAssertTrue("abcde".contains("abcd")) + XCTAssertTrue("abcde".contains("bcde")) + XCTAssertTrue("abcde".contains("bcd")) + XCTAssertTrue("ababacabababa".contains("abababa")) + + XCTAssertFalse("".contains("abcd")) + + for start in 0..<9 { + for end in start..<9 { + XCTAssertTrue((0..<10).contains(start...end)) + XCTAssertFalse((0..<10).contains(start...10)) + } + } + } + func testRanges() { func expectRanges( _ string: String, @@ -40,6 +66,9 @@ class RegexConsumerTests: XCTestCase { // `IndexingIterator` tests the collection conformance let actualCol: [Range] = string[...].ranges(of: regex)[...].map(string.offsets(of:)) XCTAssertEqual(actualCol, expected, file: file, line: line) + + let firstRange = string.firstRange(of: regex).map(string.offsets(of:)) + XCTAssertEqual(firstRange, expected.first, file: file, line: line) } expectRanges("", "", [0..<0]) @@ -60,6 +89,31 @@ class RegexConsumerTests: XCTestCase { expectRanges("abc", "(a|b)*", [0..<2, 2..<2, 3..<3]) expectRanges("abc", "(b|c)+", [1..<3]) expectRanges("abc", "(b|c)*", [0..<0, 1..<3, 3..<3]) + + func expectStringRanges( + _ input: String, + _ pattern: String, + _ expected: [Range], + file: StaticString = #file, line: UInt = #line + ) { + let actualSeq: [Range] = input.ranges(of: pattern).map(input.offsets(of:)) + XCTAssertEqual(actualSeq, expected, file: file, line: line) + + // `IndexingIterator` tests the collection conformance + let actualCol: [Range] = input.ranges(of: pattern)[...].map(input.offsets(of:)) + XCTAssertEqual(actualCol, expected, file: file, line: line) + + let firstRange = input.firstRange(of: pattern).map(input.offsets(of:)) + XCTAssertEqual(firstRange, expected.first, file: file, line: line) + } + + expectStringRanges("", "", [0..<0]) + expectStringRanges("abcde", "", [0..<0, 1..<1, 2..<2, 3..<3, 4..<4, 5..<5]) + expectStringRanges("abcde", "abcd", [0..<4]) + expectStringRanges("abcde", "bcde", [1..<5]) + expectStringRanges("abcde", "bcd", [1..<4]) + expectStringRanges("ababacabababa", "abababa", [6..<13]) + expectStringRanges("ababacabababa", "aba", [0..<3, 6..<9, 10..<13]) } func testSplit() { @@ -70,7 +124,7 @@ class RegexConsumerTests: XCTestCase { file: StaticString = #file, line: UInt = #line ) { let regex = try! Regex(regex) - let actual = Array(string.split(by: regex)) + let actual = Array(string.split(separator: regex, omittingEmptySubsequences: false)) XCTAssertEqual(actual, expected, file: file, line: line) } @@ -79,6 +133,141 @@ class RegexConsumerTests: XCTestCase { expectSplit("a", "", ["", "a", ""]) expectSplit("a", "x", ["a"]) expectSplit("a", "a", ["", ""]) + expectSplit("a____a____a", "_+", ["a", "a", "a"]) + expectSplit("____a____a____a____", "_+", ["", "a", "a", "a", ""]) + + // Test that original `split` functions are still accessible + let splitRef = "abcd".split + XCTAssert(type(of: splitRef) == ((Character, Int, Bool) -> [Substring]).self) + let splitParamsRef = "abcd".split(separator:maxSplits:omittingEmptySubsequences:) + XCTAssert(type(of: splitParamsRef) == ((Character, Int, Bool) -> [Substring]).self) + } + + func testSplitPermutations() throws { + let splitRegex = try Regex(#"\|"#) + XCTAssertEqual( + "a|a|||a|a".split(separator: splitRegex), + ["a", "a", "a", "a"]) + XCTAssertEqual( + "a|a|||a|a".split(separator: splitRegex, omittingEmptySubsequences: false), + ["a", "a", "", "", "a", "a"]) + XCTAssertEqual( + "a|a|||a|a".split(separator: splitRegex, maxSplits: 2), + ["a", "a", "||a|a"]) + + XCTAssertEqual( + "a|a|||a|a|||a|a|||".split(separator: "|||"), + ["a|a", "a|a", "a|a"]) + XCTAssertEqual( + "a|a|||a|a|||a|a|||".split(separator: "|||", omittingEmptySubsequences: false), + ["a|a", "a|a", "a|a", ""]) + XCTAssertEqual( + "a|a|||a|a|||a|a|||".split(separator: "|||", maxSplits: 2), + ["a|a", "a|a", "a|a|||"]) + + XCTAssertEqual( + "aaaa".split(separator: ""), + ["a", "a", "a", "a"]) + XCTAssertEqual( + "aaaa".split(separator: "", omittingEmptySubsequences: false), + ["", "a", "a", "a", "a", ""]) + XCTAssertEqual( + "aaaa".split(separator: "", maxSplits: 2), + ["a", "a", "aa"]) + XCTAssertEqual( + "aaaa".split(separator: "", maxSplits: 2, omittingEmptySubsequences: false), + ["", "a", "aaa"]) + + // Fuzzing the input and parameters + for _ in 1...1_000 { + // Make strings that look like: + // "aaaaaaa" + // "|||aaaa||||" + // "a|a|aa|aa|" + // "|a||||aaa|a|||" + // "a|aa" + let keepCount = Int.random(in: 0...10) + let splitCount = Int.random(in: 0...10) + let str = [repeatElement("a", count: keepCount), repeatElement("|", count: splitCount)] + .joined() + .shuffled() + .joined() + + let omitEmpty = Bool.random() + let maxSplits = Bool.random() ? Int.max : Int.random(in: 0...10) + + // Use the stdlib behavior as the expected outcome + let expected = str.split( + separator: "|" as Character, + maxSplits: maxSplits, + omittingEmptySubsequences: omitEmpty) + let regexActual = str.split( + separator: splitRegex, + maxSplits: maxSplits, + omittingEmptySubsequences: omitEmpty) + let stringActual = str.split( + separator: "|" as String, + maxSplits: maxSplits, + omittingEmptySubsequences: omitEmpty) + XCTAssertEqual(regexActual, expected, """ + Mismatch in regex split of '\(str)', maxSplits: \(maxSplits), omitEmpty: \(omitEmpty) + expected: \(expected.map(String.init)) + actual: \(regexActual.map(String.init)) + """) + XCTAssertEqual(stringActual, expected, """ + Mismatch in string split of '\(str)', maxSplits: \(maxSplits), omitEmpty: \(omitEmpty) + expected: \(expected.map(String.init)) + actual: \(regexActual.map(String.init)) + """) + } + } + + func testTrim() { + func expectTrim( + _ string: String, + _ regex: String, + _ expected: Substring, + file: StaticString = #file, line: UInt = #line + ) { + let regex = try! Regex(regex) + let actual = string.trimmingPrefix(regex) + XCTAssertEqual(actual, expected, file: file, line: line) + } + + expectTrim("", "", "") + expectTrim("", "x", "") + expectTrim("a", "", "a") + expectTrim("a", "x", "a") + expectTrim("___a", "_", "__a") + expectTrim("___a", "_+", "a") + + XCTAssertEqual("".trimmingPrefix("a"), "") + XCTAssertEqual("a".trimmingPrefix("a"), "") + XCTAssertEqual("b".trimmingPrefix("a"), "b") + XCTAssertEqual("a".trimmingPrefix(""), "a") + XCTAssertEqual("___a".trimmingPrefix("_"), "__a") + XCTAssertEqual("___a".trimmingPrefix("___"), "a") + XCTAssertEqual("___a".trimmingPrefix("____"), "___a") + XCTAssertEqual("___a".trimmingPrefix("___a"), "") + + do { + let prefix = makeSingleUseSequence(element: "_" as Character, count: 5) + XCTAssertEqual("_____a".trimmingPrefix(prefix), "a") + XCTAssertEqual("_____a".trimmingPrefix(prefix), "_____a") + } + do { + let prefix = makeSingleUseSequence(element: "_" as Character, count: 5) + XCTAssertEqual("a".trimmingPrefix(prefix), "a") + // The result of this next call is technically undefined, so this + // is just to test that it doesn't crash. + XCTAssertNotEqual("_____a".trimmingPrefix(prefix), "") + } + + XCTAssertEqual("".trimmingPrefix(while: \.isWhitespace), "") + XCTAssertEqual("a".trimmingPrefix(while: \.isWhitespace), "a") + XCTAssertEqual(" ".trimmingPrefix(while: \.isWhitespace), "") + XCTAssertEqual(" a".trimmingPrefix(while: \.isWhitespace), "a") + XCTAssertEqual("a ".trimmingPrefix(while: \.isWhitespace), "a ") } func testReplace() { @@ -107,37 +296,6 @@ class RegexConsumerTests: XCTestCase { expectReplace("aab", "a*", "X", "XXbX") } - func testAdHoc() { - let r = try! Regex("a|b+") - - XCTAssert("palindrome".contains(r)) - XCTAssert("botany".contains(r)) - XCTAssert("antiquing".contains(r)) - XCTAssertFalse("cdef".contains(r)) - - let str = "a string with the letter b in it" - let first = str.firstRange(of: r) - let last = str.lastRange(of: r) - let (expectFirst, expectLast) = ( - str.index(atOffset: 0)..