diff --git a/Sources/RegexBuilder/Algorithms.swift b/Sources/RegexBuilder/Algorithms.swift index f1f6d97a0..88916879b 100644 --- a/Sources/RegexBuilder/Algorithms.swift +++ b/Sources/RegexBuilder/Algorithms.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -import _StringProcessing +@_spi(RegexBuilder) import _StringProcessing // FIXME(rdar://92459215): We should be using 'some RegexComponent' instead of // for the methods below that don't impose any additional @@ -313,3 +313,31 @@ where Self: BidirectionalCollection, SubSequence == Substring { try replace(content(), maxReplacements: maxReplacements, with: replacement) } } + +// String split overload breakers + +extension StringProtocol where SubSequence == Substring { + @available(SwiftStdlib 5.7, *) + public func split( + separator: String, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true + ) -> [Substring] { + return _split( + separator: separator, + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences) + } + + @available(SwiftStdlib 5.7, *) + public func split( + separator: Substring, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true + ) -> [Substring] { + return _split( + separator: separator, + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences) + } +} diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift index bb86f4676..020ea8208 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift @@ -27,6 +27,7 @@ extension Collection where Element: Equatable { /// - Parameter other: A sequence to search for within this collection. /// - Returns: `true` if the collection contains the specified sequence, /// otherwise `false`. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func contains(_ other: C) -> Bool where C.Element == Element @@ -49,11 +50,13 @@ extension BidirectionalCollection where Element: Comparable { // Overload breakers extension StringProtocol { + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func contains(_ other: String) -> Bool { firstRange(of: other) != nil } + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func contains(_ other: Substring) -> Bool { firstRange(of: other) != nil @@ -68,6 +71,7 @@ extension BidirectionalCollection where SubSequence == Substring { /// - Parameter regex: A regex to search for within this collection. /// - Returns: `true` if the regex was found in the collection, otherwise /// `false`. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func contains(_ regex: R) -> Bool { _contains(RegexConsumer(regex)) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift index ad5c535d4..30c2fac92 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift @@ -75,6 +75,7 @@ extension BidirectionalCollection where SubSequence == Substring { /// - Parameter regex: The regex to search for. /// - Returns: A range in the collection of the first occurrence of `regex`. /// Returns `nil` if `regex` is not found. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func firstRange(of regex: R) -> Range? { _firstRange(of: RegexConsumer(regex)) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 9f3c50c7c..578c499d1 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -251,6 +251,7 @@ extension BidirectionalCollection where SubSequence == Substring { /// - Parameter regex: The regex to search for. /// - Returns: A collection or ranges in the receiver of all occurrences of /// `regex`. Returns an empty collection if `regex` is not found. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func ranges( of regex: R diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index daf726fe7..5c2bc035f 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -210,6 +210,7 @@ extension RangeReplaceableCollection where SubSequence == Substring { /// sequence matching `regex` to replace. Default is `Int.max`. /// - Returns: A new collection in which all occurrences of subsequence /// matching `regex` are replaced by `replacement`. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func replacing( _ regex: R, diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 412197557..16cc47c39 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -307,13 +307,17 @@ extension Collection where Element: Equatable { /// - Parameter separator: The element to be split upon. /// - Returns: A collection of subsequences, split from this collection's /// elements. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func split( separator: C, maxSplits: Int = .max, omittingEmptySubsequences: Bool = true ) -> [SubSequence] where C.Element == Element { - Array(split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) + Array(split( + by: ZSearcher(pattern: Array(separator), by: ==), + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences)) } } @@ -352,6 +356,45 @@ extension BidirectionalCollection where Element: Comparable { // } } +// String split overload breakers +// +// These are underscored and marked as SPI so that the *actual* public overloads +// are only visible in RegexBuilder, to avoid breaking source with the +// standard library's function of the same name that takes a `Character` +// as the separator. *Those* overloads are necessary as tie-breakers between +// the Collection-based and Regex-based `split`s, which in turn are both marked +// @_disfavoredOverload to avoid the wrong overload being selected when a +// collection's element type could be used interchangably with a collection of +// that element (e.g. `Array.split(separator: [])`). + +extension StringProtocol where SubSequence == Substring { + @_spi(RegexBuilder) + @available(SwiftStdlib 5.7, *) + public func _split( + separator: String, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true + ) -> [Substring] { + Array(split( + by: ZSearcher(pattern: Array(separator), by: ==), + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences)) + } + + @_spi(RegexBuilder) + @available(SwiftStdlib 5.7, *) + public func _split( + separator: Substring, + maxSplits: Int = .max, + omittingEmptySubsequences: Bool = true + ) -> [Substring] { + Array(split( + by: ZSearcher(pattern: Array(separator), by: ==), + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences)) + } +} + // MARK: Regex algorithms @available(SwiftStdlib 5.7, *) @@ -388,6 +431,9 @@ extension BidirectionalCollection where SubSequence == Substring { maxSplits: Int = .max, omittingEmptySubsequences: Bool = true ) -> [SubSequence] { - Array(split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) + Array(split( + by: RegexConsumer(separator), + maxSplits: maxSplits, + omittingEmptySubsequences: omittingEmptySubsequences)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index 2e955507b..16a3cb207 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -216,7 +216,6 @@ extension Collection where SubSequence == Self, Element: Equatable { } extension RangeReplaceableCollection where Element: Equatable { - @_disfavoredOverload /// Removes the initial elements that satisfy the given predicate from the /// start of the sequence. /// - Parameter predicate: A closure that takes an element of the sequence @@ -290,6 +289,7 @@ extension BidirectionalCollection where SubSequence == Substring { /// - Parameter prefix: The collection to remove from this collection. /// - Returns: A collection containing the elements that does not match /// `prefix` from the start. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func trimmingPrefix(_ regex: R) -> SubSequence { _trimmingPrefix(RegexConsumer(regex)) @@ -311,6 +311,7 @@ extension RangeReplaceableCollection { /// Removes the initial elements that matches the given regex. /// - Parameter regex: The regex to remove from this collection. + @_disfavoredOverload @available(SwiftStdlib 5.7, *) public mutating func trimPrefix(_ regex: R) { _trimPrefix(RegexConsumer(regex)) diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index 64fe74fc6..1a5bc34df 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -32,10 +32,34 @@ func makeSingleUseSequence(element: T, count: Int) -> UnfoldSequence } } +struct CountedOptionSet: OptionSet { + static var arrayLiteralCreationCount = 0 + + var rawValue: Int + + static var one = Self(rawValue: 1) + static var two = Self(rawValue: 1) +} + +extension CountedOptionSet { + init(arrayLiteral: Self...) { + Self.arrayLiteralCreationCount += 1 + self.rawValue = 0 + for element in arrayLiteral { + self.insert(element) + } + } +} + class AlgorithmTests: XCTestCase { func testContains() { - XCTAssertTrue("".contains("")) - XCTAssertTrue("abcde".contains("")) + XCTAssertTrue("abcde".contains("a")) + XCTAssertTrue("abcde".contains("e" as Character)) + + XCTExpectFailure { + XCTAssertTrue("".contains("")) + XCTAssertTrue("abcde".contains("")) + } XCTAssertTrue("abcde".contains("abcd")) XCTAssertTrue("abcde".contains("bcde")) XCTAssertTrue("abcde".contains("bcd")) @@ -51,7 +75,36 @@ class AlgorithmTests: XCTestCase { } } - func testRanges() { + func testContainsSourceCompatibility() { + CountedOptionSet.arrayLiteralCreationCount = 0 + + let both: CountedOptionSet = [.one, .two] + let none: CountedOptionSet = [] + XCTAssertEqual(CountedOptionSet.arrayLiteralCreationCount, 2) + + let cosArray = [both, .one, .two] + XCTAssertFalse(cosArray.contains(none)) + + // This tests that `contains([])` uses the element-based `contains(_:)` + // method, interpreting `[]` as an instance of `CountedOptionSet`, rather + // than the collection-based overload, which would interpret `[]` as an + // `Array`. + XCTAssertFalse(cosArray.contains([])) + XCTAssertEqual(CountedOptionSet.arrayLiteralCreationCount, 3) + + // For these references to resolve to the `Element`-based stdlib function, + // the `String`- and `Substring`-based `contains` functions need to be + // marked as `@_disfavoredOverload`. However, that means that Foundation's + // `String.contains` get selected instead, which has inconsistent behavior. + + // Test that original `contains` functions are still accessible + let containsRef = "abcd".contains + XCTAssert(type(of: containsRef) == ((Character) -> Bool).self) + let containsParamsRef = "abcd".contains(_:) + XCTAssert(type(of: containsParamsRef) == ((Character) -> Bool).self) + } + + func testRegexRanges() { func expectRanges( _ string: String, _ regex: String, @@ -78,6 +131,7 @@ class AlgorithmTests: XCTestCase { expectRanges("", "x", []) expectRanges("", "x+", []) expectRanges("", "x*", [0..<0]) + expectRanges("aaa", "a*", [0..<3, 3..<3]) expectRanges("abc", "", [0..<0, 1..<1, 2..<2, 3..<3]) expectRanges("abc", "x", []) expectRanges("abc", "x+", []) @@ -92,8 +146,10 @@ class AlgorithmTests: XCTestCase { expectRanges("abc", "(a|b)*", [0..<2, 2..<2, 3..<3]) expectRanges("abc", "(b|c)+", [1..<3]) expectRanges("abc", "(b|c)*", [0..<0, 1..<3, 3..<3]) - - func expectStringRanges( + } + + func testStringRanges() { + func expectRanges( _ input: String, _ pattern: String, _ expected: [Range], @@ -110,16 +166,16 @@ class AlgorithmTests: XCTestCase { XCTAssertEqual(firstRange, expected.first, file: file, line: line) } - expectStringRanges("", "", [0..<0]) - expectStringRanges("abcde", "", [0..<0, 1..<1, 2..<2, 3..<3, 4..<4, 5..<5]) - expectStringRanges("abcde", "abcd", [0..<4]) - expectStringRanges("abcde", "bcde", [1..<5]) - expectStringRanges("abcde", "bcd", [1..<4]) - expectStringRanges("ababacabababa", "abababa", [6..<13]) - expectStringRanges("ababacabababa", "aba", [0..<3, 6..<9, 10..<13]) + expectRanges("", "", [0..<0]) + expectRanges("abcde", "", [0..<0, 1..<1, 2..<2, 3..<3, 4..<4, 5..<5]) + expectRanges("abcde", "abcd", [0..<4]) + expectRanges("abcde", "bcde", [1..<5]) + expectRanges("abcde", "bcd", [1..<4]) + expectRanges("ababacabababa", "abababa", [6..<13]) + expectRanges("ababacabababa", "aba", [0..<3, 6..<9, 10..<13]) } - func testSplit() { + func testRegexSplit() { func expectSplit( _ string: String, _ regex: String, @@ -138,9 +194,47 @@ class AlgorithmTests: XCTestCase { expectSplit("a", "a", ["", ""]) expectSplit("a____a____a", "_+", ["a", "a", "a"]) expectSplit("____a____a____a____", "_+", ["", "a", "a", "a", ""]) + } + + func testStringSplit() { + func expectSplit( + _ string: String, + _ separator: String, + _ expected: [Substring], + file: StaticString = #file, line: UInt = #line + ) { + let actual = Array(string.split(separator: separator, omittingEmptySubsequences: false)) + XCTAssertEqual(actual, expected, file: file, line: line) + } + + expectSplit("", "", [""]) + expectSplit("", "x", [""]) + expectSplit("a", "", ["", "a", ""]) + expectSplit("a", "x", ["a"]) + expectSplit("a", "a", ["", ""]) + expectSplit("a__a__a", "_", ["a", "", "a", "", "a"]) + expectSplit("_a_a_a_", "_", ["", "a", "a", "a", ""]) XCTAssertEqual("".split(separator: ""), []) XCTAssertEqual("".split(separator: "", omittingEmptySubsequences: false), [""]) + } + + func testSplitSourceCompatibility() { + CountedOptionSet.arrayLiteralCreationCount = 0 + + let both: CountedOptionSet = [.one, .two] + let none: CountedOptionSet = [] + XCTAssertEqual(CountedOptionSet.arrayLiteralCreationCount, 2) + + let cosArray = [both, .one, .two] + XCTAssertEqual(cosArray.split(separator: none).count, 1) + + // This tests that `contains([])` uses the element-based `contains(_:)` + // method, interpreting `[]` as an instance of `CountedOptionSet`, rather + // than the collection-based overload, which would interpret `[]` as an + // `Array`. + XCTAssertEqual(cosArray.split(separator: []).count, 1) + XCTAssertEqual(CountedOptionSet.arrayLiteralCreationCount, 3) // Test that original `split` functions are still accessible let splitRef = "abcd".split @@ -148,7 +242,7 @@ class AlgorithmTests: XCTestCase { let splitParamsRef = "abcd".split(separator:maxSplits:omittingEmptySubsequences:) XCTAssert(type(of: splitParamsRef) == ((Character, Int, Bool) -> [Substring]).self) } - + func testSplitPermutations() throws { let splitRegex = try Regex(#"\|"#) XCTAssertEqual( @@ -228,7 +322,7 @@ class AlgorithmTests: XCTestCase { } } - func testTrim() { + func testRegexTrim() { func expectTrim( _ string: String, _ regex: String, @@ -238,6 +332,10 @@ class AlgorithmTests: XCTestCase { let regex = try! Regex(regex) let actual = string.trimmingPrefix(regex) XCTAssertEqual(actual, expected, file: file, line: line) + + var actual2 = string + actual2.trimPrefix(regex) + XCTAssertEqual(actual2[...], expected, file: file, line: line) } expectTrim("", "", "") @@ -246,15 +344,54 @@ class AlgorithmTests: XCTestCase { expectTrim("a", "x", "a") expectTrim("___a", "_", "__a") expectTrim("___a", "_+", "a") - - XCTAssertEqual("".trimmingPrefix("a"), "") - XCTAssertEqual("a".trimmingPrefix("a"), "") - XCTAssertEqual("b".trimmingPrefix("a"), "b") - XCTAssertEqual("a".trimmingPrefix(""), "a") - XCTAssertEqual("___a".trimmingPrefix("_"), "__a") - XCTAssertEqual("___a".trimmingPrefix("___"), "a") - XCTAssertEqual("___a".trimmingPrefix("____"), "___a") - XCTAssertEqual("___a".trimmingPrefix("___a"), "") + } + + func testPredicateTrim() { + func expectTrim( + _ string: String, + _ predicate: (Character) -> Bool, + _ expected: Substring, + file: StaticString = #file, line: UInt = #line + ) { + let actual = string.trimmingPrefix(while: predicate) + XCTAssertEqual(actual, expected, file: file, line: line) + + var actual2 = string + actual2.trimPrefix(while: predicate) + XCTAssertEqual(actual2[...], expected, file: file, line: line) + } + + expectTrim("", \.isWhitespace, "") + expectTrim("a", \.isWhitespace, "a") + expectTrim(" ", \.isWhitespace, "") + expectTrim(" a", \.isWhitespace, "a") + expectTrim("a ", \.isWhitespace, "a ") + } + + func testStringTrim() { + func expectTrim( + _ string: String, + _ pattern: String, + _ expected: Substring, + file: StaticString = #file, line: UInt = #line + ) { + let actual = string.trimmingPrefix(pattern) + XCTAssertEqual(actual, expected, file: file, line: line) + + var actual2 = string + actual2.trimPrefix(pattern) + XCTAssertEqual(actual2[...], expected, file: file, line: line) + } + + expectTrim("", "", "") + expectTrim("", "x", "") + expectTrim("a", "", "a") + expectTrim("a", "x", "a") + expectTrim("a", "a", "") + expectTrim("___a", "_", "__a") + expectTrim("___a", "___", "a") + expectTrim("___a", "____", "___a") + expectTrim("___a", "___a", "") do { let prefix = makeSingleUseSequence(element: "_" as Character, count: 5) @@ -268,15 +405,9 @@ class AlgorithmTests: XCTestCase { // is just to test that it doesn't crash. XCTAssertNotEqual("_____a".trimmingPrefix(prefix), "") } - - XCTAssertEqual("".trimmingPrefix(while: \.isWhitespace), "") - XCTAssertEqual("a".trimmingPrefix(while: \.isWhitespace), "a") - XCTAssertEqual(" ".trimmingPrefix(while: \.isWhitespace), "") - XCTAssertEqual(" a".trimmingPrefix(while: \.isWhitespace), "a") - XCTAssertEqual("a ".trimmingPrefix(while: \.isWhitespace), "a ") } - func testReplace() { + func testRegexReplace() { func expectReplace( _ string: String, _ regex: String, @@ -300,8 +431,33 @@ class AlgorithmTests: XCTestCase { expectReplace("aab", "a", "X", "XXb") expectReplace("aab", "a+", "X", "Xb") expectReplace("aab", "a*", "X", "XXbX") + + // FIXME: Test maxReplacements + // FIXME: Test closure-based replacement } + func testStringReplace() { + func expectReplace( + _ string: String, + _ pattern: String, + _ replacement: String, + _ expected: String, + file: StaticString = #file, line: UInt = #line + ) { + let actual = string.replacing(pattern, with: replacement) + XCTAssertEqual(actual, expected, file: file, line: line) + } + + expectReplace("", "", "X", "X") + expectReplace("", "x", "X", "") + expectReplace("a", "", "X", "XaX") + expectReplace("a", "x", "X", "a") + expectReplace("a", "a", "X", "X") + expectReplace("aab", "a", "X", "XXb") + + // FIXME: Test maxReplacements + } + func testSubstring() throws { let s = "aaa | aaaaaa | aaaaaaaaaa" let s1 = s.dropFirst(6) // "aaaaaa | aaaaaaaaaa"