Skip to content

Commit 00c4021

Browse files
committed
Add wholeMatch and prefixMatch
Add the functions to string processing algorithms proposal and implement the change. Move the functions from `String` and `SubString` extensions to `BidirectionalCollection`. Add tests for `firstMatch`, `wholeMatch`, and `prefixMatch` that use a custom `BidirectionalCollection` type.
1 parent a0ed7e1 commit 00c4021

File tree

4 files changed

+191
-27
lines changed

4 files changed

+191
-27
lines changed

Documentation/Evolution/StringProcessingAlgorithms.md

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,11 @@ We also propose the following regex-powered algorithms as well as their generic
162162
|`replace(:with:subrange:maxReplacements)`| Replaces all occurrences of the sequence matching the given `RegexComponent` or sequence with a given collection |
163163
|`split(by:)`| Returns the longest possible subsequences of the collection around elements equal to the given separator |
164164
|`firstMatch(of:)`| Returns the first match of the specified `RegexComponent` within the collection |
165+
|`wholeMatch(of:)`| Matches the specified `RegexComponent` in the collection as a whole |
166+
|`prefixMatch(of:)`| Matches the specified `RegexComponent` against the collection at the beginning |
165167
|`matches(of:)`| Returns a collection containing all matches of the specified `RegexComponent` |
166168

167169

168-
169170
## Detailed design
170171

171172
### `CustomMatchingRegexComponent`
@@ -389,7 +390,7 @@ extension BidirectionalCollection where SubSequence == Substring {
389390
}
390391
```
391392

392-
#### First match
393+
#### Match
393394

394395
```swift
395396
extension BidirectionalCollection where SubSequence == Substring {
@@ -398,6 +399,16 @@ extension BidirectionalCollection where SubSequence == Substring {
398399
/// - Returns: The first match of `regex` in the collection, or `nil` if
399400
/// there isn't a match.
400401
public func firstMatch<R: RegexComponent>(of regex: R) -> RegexMatch<R.Match>?
402+
403+
/// Match a regex in its entirety.
404+
/// - Parameter r: The regex to match against.
405+
/// - Returns: The match if there is one, or `nil` if none.
406+
public func wholeMatch<R: RegexComponent>(of r: R) -> Regex<R.Output>.Match?
407+
408+
/// Match part of the regex, starting at the beginning.
409+
/// - Parameter r: The regex to match against.
410+
/// - Returns: The match if there is one, or `nil` if none.
411+
public func prefixMatch<R: RegexComponent>(of r: R) -> Regex<R.Output>.Match?
401412
}
402413
```
403414

@@ -473,7 +484,7 @@ extension RangeReplaceableCollection where SubSequence == Substring {
473484
/// - Returns: A new collection in which all occurrences of subsequence
474485
/// matching `regex` in `subrange` are replaced by `replacement`.
475486
public func replacing<R: RegexComponent, Replacement: Collection>(
476-
_ regex: R,
487+
_ r: R,
477488
with replacement: Replacement,
478489
subrange: Range<Index>,
479490
maxReplacements: Int = .max
@@ -489,7 +500,7 @@ extension RangeReplaceableCollection where SubSequence == Substring {
489500
/// - Returns: A new collection in which all occurrences of subsequence
490501
/// matching `regex` are replaced by `replacement`.
491502
public func replacing<R: RegexComponent, Replacement: Collection>(
492-
_ regex: R,
503+
_ r: R,
493504
with replacement: Replacement,
494505
maxReplacements: Int = .max
495506
) -> Self where Replacement.Element == Element
@@ -502,7 +513,7 @@ extension RangeReplaceableCollection where SubSequence == Substring {
502513
/// - maxReplacements: A number specifying how many occurrences of the
503514
/// sequence matching `regex` to replace. Default is `Int.max`.
504515
public mutating func replace<R: RegexComponent, Replacement: Collection>(
505-
_ regex: R,
516+
_ r: R,
506517
with replacement: Replacement,
507518
maxReplacements: Int = .max
508519
) where Replacement.Element == Element
@@ -519,7 +530,7 @@ extension RangeReplaceableCollection where SubSequence == Substring {
519530
/// - Returns: A new collection in which all occurrences of subsequence
520531
/// matching `regex` are replaced by `replacement`.
521532
public func replacing<R: RegexComponent, Replacement: Collection>(
522-
_ regex: R,
533+
_ r: R,
523534
with replacement: (RegexMatch<R.Match>) throws -> Replacement,
524535
subrange: Range<Index>,
525536
maxReplacements: Int = .max
@@ -536,7 +547,7 @@ extension RangeReplaceableCollection where SubSequence == Substring {
536547
/// - Returns: A new collection in which all occurrences of subsequence
537548
/// matching `regex` are replaced by `replacement`.
538549
public func replacing<R: RegexComponent, Replacement: Collection>(
539-
_ regex: R,
550+
_ r: R,
540551
with replacement: (RegexMatch<R.Match>) throws -> Replacement,
541552
maxReplacements: Int = .max
542553
) rethrows -> Self where Replacement.Element == Element
@@ -550,7 +561,7 @@ extension RangeReplaceableCollection where SubSequence == Substring {
550561
/// - maxReplacements: A number specifying how many occurrences of the
551562
/// sequence matching `regex` to replace. Default is `Int.max`.
552563
public mutating func replace<R: RegexComponent, Replacement: Collection>(
553-
_ regex: R,
564+
_ r: R,
554565
with replacement: (RegexMatch<R.Match>) throws -> Replacement,
555566
maxReplacements: Int = .max
556567
) rethrows where Replacement.Element == Element
@@ -609,4 +620,4 @@ Trimming a string from both sides shares a similar story. For example, `"ababa".
609620

610621
### Future API
611622

612-
Some Python functions are not currently included in this proposal, such as trimming the suffix from a string/collection. This pitch aims to establish a pattern for using `RegexComponent` with string processing algorithms, so that further enhancement can to be introduced to the standard library easily in the future, and eventually close the gap between Swift and other popular scripting languages.
623+
Some common string processing functions are not currently included in this proposal, such as trimming the suffix from a string/collection, and finding overlapping ranges of matched substrings. This pitch aims to establish a pattern for using `RegexComponent` with string processing algorithms, so that further enhancement can to be introduced to the standard library easily in the future, and eventually close the gap between Swift and other popular scripting languages.

Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ extension BidirectionalCollection {
3939

4040
extension BidirectionalCollection where SubSequence == Substring {
4141
@available(SwiftStdlib 5.7, *)
42+
@_disfavoredOverload
4243
func firstMatch<R: RegexComponent>(
4344
of regex: R
4445
) -> _MatchResult<RegexConsumer<R, Self>>? {

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -156,31 +156,22 @@ extension Regex {
156156
}
157157

158158
@available(SwiftStdlib 5.7, *)
159-
extension String {
159+
extension BidirectionalCollection where SubSequence == Substring {
160+
/// Match a regex in its entirety.
161+
/// - Parameter r: The regex to match against.
162+
/// - Returns: The match if there is one, or `nil` if none.
160163
public func wholeMatch<R: RegexComponent>(
161164
of r: R
162165
) -> Regex<R.RegexOutput>.Match? {
163-
try? r.regex.wholeMatch(in: self)
166+
try? r.regex.wholeMatch(in: self[...].base)
164167
}
165168

169+
/// Match part of the regex, starting at the beginning.
170+
/// - Parameter r: The regex to match against.
171+
/// - Returns: The match if there is one, or `nil` if none.
166172
public func prefixMatch<R: RegexComponent>(
167173
of r: R
168174
) -> Regex<R.RegexOutput>.Match? {
169-
try? r.regex.prefixMatch(in: self)
170-
}
171-
}
172-
173-
@available(SwiftStdlib 5.7, *)
174-
extension Substring {
175-
public func wholeMatch<R: RegexComponent>(
176-
of r: R
177-
) -> Regex<R.RegexOutput>.Match? {
178-
try? r.regex.wholeMatch(in: self)
179-
}
180-
181-
public func prefixMatch<R: RegexComponent>(
182-
of r: R
183-
) -> Regex<R.RegexOutput>.Match? {
184-
try? r.regex.prefixMatch(in: self)
175+
try? r.regex.prefixMatch(in: self[...])
185176
}
186177
}

Tests/RegexBuilderTests/CustomTests.swift

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,51 @@ func customTest<Match: Equatable>(
7070
}
7171
}
7272

73+
// Test support
74+
struct Concat : Equatable {
75+
var wrapped: String
76+
init(_ name: String, _ suffix: Int?) {
77+
if let suffix = suffix {
78+
wrapped = name + String(suffix)
79+
} else {
80+
wrapped = name
81+
}
82+
}
83+
}
84+
85+
extension Concat : Collection {
86+
typealias Index = String.Index
87+
typealias Element = String.Element
88+
89+
var startIndex: Index { return wrapped.startIndex }
90+
var endIndex: Index { return wrapped.endIndex }
91+
92+
subscript(position: Index) -> Element {
93+
return wrapped[position]
94+
}
95+
96+
func index(after i: Index) -> Index {
97+
return wrapped.index(after: i)
98+
}
99+
}
100+
101+
extension Concat: BidirectionalCollection {
102+
typealias Indices = String.Indices
103+
typealias SubSequence = String.SubSequence
104+
105+
func index(before i: Index) -> Index {
106+
return wrapped.index(before: i)
107+
}
108+
109+
var indices: Indices {
110+
wrapped.indices
111+
}
112+
113+
subscript(bounds: Range<Index>) -> Substring {
114+
Substring(wrapped[bounds])
115+
}
116+
}
117+
73118
class CustomRegexComponentTests: XCTestCase {
74119
// TODO: Refactor below into more exhaustive, declarative
75120
// tests.
@@ -223,4 +268,120 @@ class CustomRegexComponentTests: XCTestCase {
223268

224269

225270
}
271+
272+
273+
func testMatchVarients() {
274+
func customTest<Match: Equatable>(
275+
_ regex: Regex<Match>,
276+
_ input: Concat,
277+
expected: (wholeMatch: Match?, firstMatch: Match?, prefixMatch: Match?),
278+
file: StaticString = #file, line: UInt = #line
279+
) {
280+
let wholeResult = input.wholeMatch(of: regex)?.output
281+
let firstResult = input.firstMatch(of: regex)?.output
282+
let prefixResult = input.prefixMatch(of: regex)?.output
283+
XCTAssertEqual(wholeResult, expected.wholeMatch, file: file, line: line)
284+
XCTAssertEqual(firstResult, expected.firstMatch, file: file, line: line)
285+
XCTAssertEqual(prefixResult, expected.prefixMatch, file: file, line: line)
286+
}
287+
288+
typealias CaptureMatch1 = (Substring, Int?)
289+
func customTest(
290+
_ regex: Regex<CaptureMatch1>,
291+
_ input: Concat,
292+
expected: (wholeMatch: CaptureMatch1?, firstMatch: CaptureMatch1?, prefixMatch: CaptureMatch1?),
293+
file: StaticString = #file, line: UInt = #line
294+
) {
295+
let wholeResult = input.wholeMatch(of: regex)?.output
296+
let firstResult = input.firstMatch(of: regex)?.output
297+
let prefixResult = input.prefixMatch(of: regex)?.output
298+
XCTAssertEqual(wholeResult?.0, expected.wholeMatch?.0, file: file, line: line)
299+
XCTAssertEqual(wholeResult?.1, expected.wholeMatch?.1, file: file, line: line)
300+
301+
XCTAssertEqual(firstResult?.0, expected.firstMatch?.0, file: file, line: line)
302+
XCTAssertEqual(firstResult?.1, expected.firstMatch?.1, file: file, line: line)
303+
304+
XCTAssertEqual(prefixResult?.0, expected.prefixMatch?.0, file: file, line: line)
305+
XCTAssertEqual(prefixResult?.1, expected.prefixMatch?.1, file: file, line: line)
306+
}
307+
308+
var regex = Regex {
309+
OneOrMore(.digit)
310+
}
311+
312+
customTest(regex, Concat("amy", 2023), expected:(nil, "2023", nil)) // amy2023
313+
customTest(regex, Concat("amy2023", nil), expected:(nil, "2023", nil))
314+
customTest(regex, Concat("amy", nil), expected:(nil, nil, nil))
315+
customTest(regex, Concat("", 2023), expected:("2023", "2023", "2023")) // 2023
316+
customTest(regex, Concat("bob012b", 2023), expected:(nil, "012", nil)) // b012b2023
317+
customTest(regex, Concat("bob012b", nil), expected:(nil, "012", nil))
318+
customTest(regex, Concat("007bob", 2023), expected:(nil, "007", "007"))
319+
customTest(regex, Concat("", nil), expected:(nil, nil, nil))
320+
321+
regex = Regex {
322+
OneOrMore(CharacterClass("a"..."z"))
323+
}
324+
325+
customTest(regex, Concat("amy", 2023), expected:(nil, "amy", "amy")) // amy2023
326+
customTest(regex, Concat("amy", nil), expected:("amy", "amy", "amy"))
327+
customTest(regex, Concat("amy2022-bob", 2023), expected:(nil, "amy", "amy")) // amy2023
328+
customTest(regex, Concat("", 2023), expected:(nil, nil, nil)) // 2023
329+
customTest(regex, Concat("bob012b", 2023), expected:(nil, "bob", "bob")) // b012b2023
330+
customTest(regex, Concat("bob012b", nil), expected:(nil, "bob", "bob"))
331+
customTest(regex, Concat("007bob", 2023), expected:(nil, "bob", nil))
332+
customTest(regex, Concat("", nil), expected:(nil, nil, nil))
333+
334+
regex = Regex {
335+
OneOrMore {
336+
CharacterClass("A"..."Z")
337+
OneOrMore(CharacterClass("a"..."z"))
338+
Repeat(.digit, count: 2)
339+
}
340+
}
341+
342+
customTest(regex, Concat("Amy12345", nil), expected:(nil, "Amy12", "Amy12"))
343+
customTest(regex, Concat("Amy", 2023), expected:(nil, "Amy20", "Amy20"))
344+
customTest(regex, Concat("Amy", 23), expected:("Amy23", "Amy23", "Amy23"))
345+
customTest(regex, Concat("", 2023), expected:(nil, nil, nil)) // 2023
346+
customTest(regex, Concat("Amy23 Boba17", nil), expected:(nil, "Amy23", "Amy23"))
347+
customTest(regex, Concat("amy23 Boba17", nil), expected:(nil, "Boba17", nil))
348+
customTest(regex, Concat("Amy23 boba17", nil), expected:(nil, "Amy23", "Amy23"))
349+
customTest(regex, Concat("amy23 Boba", 17), expected:(nil, "Boba17", nil))
350+
customTest(regex, Concat("Amy23Boba17", nil), expected:("Amy23Boba17", "Amy23Boba17", "Amy23Boba17"))
351+
customTest(regex, Concat("Amy23Boba", 17), expected:("Amy23Boba17", "Amy23Boba17", "Amy23Boba17"))
352+
customTest(regex, Concat("23 Boba", 17), expected:(nil, "Boba17", nil))
353+
354+
let twoDigitRegex = Regex {
355+
OneOrMore {
356+
CharacterClass("A"..."Z")
357+
OneOrMore(CharacterClass("a"..."z"))
358+
Capture(Repeat(.digit, count: 2)) { Int($0) }
359+
}
360+
}
361+
362+
customTest(twoDigitRegex, Concat("Amy12345", nil), expected: (nil, ("Amy12", 12), ("Amy12", 12)))
363+
customTest(twoDigitRegex, Concat("Amy", 12345), expected: (nil, ("Amy12", 12), ("Amy12", 12)))
364+
customTest(twoDigitRegex, Concat("Amy", 12), expected: (("Amy12", 12), ("Amy12", 12), ("Amy12", 12)))
365+
customTest(twoDigitRegex, Concat("Amy23 Boba", 17), expected: (nil, firstMatch: ("Amy23", 23), prefixMatch: ("Amy23", 23)))
366+
customTest(twoDigitRegex, Concat("amy23 Boba20", 23), expected:(nil, ("Boba20", 20), nil))
367+
customTest(twoDigitRegex, Concat("Amy23Boba17", nil), expected:(("Amy23Boba17", 17), ("Amy23Boba17", 17), ("Amy23Boba17", 17)))
368+
customTest(twoDigitRegex, Concat("Amy23Boba", 17), expected:(("Amy23Boba17", 17), ("Amy23Boba17", 17), ("Amy23Boba17", 17)))
369+
370+
let millennium = Regex {
371+
CharacterClass("A"..."Z")
372+
OneOrMore(CharacterClass("a"..."z"))
373+
Capture { Repeat(.digit, count: 4) } transform: { v -> Int? in
374+
guard let year = Int(v) else { return nil }
375+
return year > 2000 ? year : nil
376+
}
377+
}
378+
379+
customTest(millennium, Concat("Amy2025", nil), expected: (("Amy2025", 2025), ("Amy2025", 2025), ("Amy2025", 2025)))
380+
customTest(millennium, Concat("Amy", 2025), expected: (("Amy2025", 2025), ("Amy2025", 2025), ("Amy2025", 2025)))
381+
customTest(millennium, Concat("Amy1995", nil), expected: (("Amy1995", nil), ("Amy1995", nil), ("Amy1995", nil)))
382+
customTest(millennium, Concat("Amy", 1995), expected: (("Amy1995", nil), ("Amy1995", nil), ("Amy1995", nil)))
383+
customTest(millennium, Concat("amy2025", nil), expected: (nil, nil, nil))
384+
customTest(millennium, Concat("amy", 2025), expected: (nil, nil, nil))
385+
}
226386
}
387+

0 commit comments

Comments
 (0)