diff --git a/Sources/RegexBuilder/Anchor.swift b/Sources/RegexBuilder/Anchor.swift index 31a3e8a0d..28fc7e8d1 100644 --- a/Sources/RegexBuilder/Anchor.swift +++ b/Sources/RegexBuilder/Anchor.swift @@ -104,6 +104,12 @@ extension Anchor { /// /// This anchor is equivalent to `^` in regex syntax when the `m` option /// has been enabled or `anchorsMatchLineEndings(true)` has been called. + /// + /// For example, the following regexes are all equivalent: + /// + /// - `Regex { Anchor.startOfLine }` + /// - `/(?m)^/` or `/(?m:^)/` + /// - `/^/.anchorsMatchLineEndings(true)` public static var startOfLine: Anchor { Anchor(kind: .startOfLine) } @@ -113,6 +119,12 @@ extension Anchor { /// /// This anchor is equivalent to `$` in regex syntax when the `m` option /// has been enabled or `anchorsMatchLineEndings(true)` has been called. + /// + /// For example, the following regexes are all equivalent: + /// + /// - `Regex { Anchor.endOfLine }` + /// - `/(?m)$/` or `/(?m:$)/` + /// - `/$/.anchorsMatchLineEndings(true)` public static var endOfLine: Anchor { Anchor(kind: .endOfLine) } diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift index 24d5c422e..88d2dbf5d 100644 --- a/Sources/_StringProcessing/Regex/Options.swift +++ b/Sources/_StringProcessing/Regex/Options.swift @@ -12,7 +12,7 @@ @_implementationOnly import _RegexParser @available(SwiftStdlib 5.7, *) -extension RegexComponent { +extension Regex { /// Returns a regular expression that ignores case when matching. /// /// - Parameter ignoresCase: A Boolean value indicating whether to ignore case. @@ -65,7 +65,7 @@ extension RegexComponent { /// - Parameter wordBoundaryKind: The algorithm to use for determining word boundaries. /// - Returns: The modified regular expression. public func wordBoundaryKind(_ wordBoundaryKind: RegexWordBoundaryKind) -> Regex { - wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .unicodeLevel2) + wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .default) } /// Returns a regular expression where the start and end of input @@ -83,8 +83,8 @@ extension RegexComponent { /// /// This method corresponds to applying the `m` option in regex syntax. For /// this behavior in the `RegexBuilder` syntax, see - /// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``, - /// and ``Anchor.endOfInput``. + /// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfSubject``, + /// and ``Anchor.endOfSubject``. /// /// - Parameter matchLineEndings: A Boolean value indicating whether `^` and /// `$` should match the start and end of lines, respectively. @@ -205,7 +205,7 @@ public struct RegexWordBoundaryKind: Hashable { /// that match `/\w\W/` or `/\W\w/`, or between the start or end of the input /// and a `\w` character. Word boundaries therefore depend on the option- /// defined behavior of `\w`. - public static var unicodeLevel1: Self { + public static var simple: Self { .init(base: .unicodeLevel1) } @@ -215,7 +215,7 @@ public struct RegexWordBoundaryKind: Hashable { /// Default word boundaries use a Unicode algorithm that handles some cases /// better than simple word boundaries, such as words with internal /// punctuation, changes in script, and Emoji. - public static var unicodeLevel2: Self { + public static var `default`: Self { .init(base: .unicodeLevel2) } } diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index b67c6c242..47d2ebf02 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -234,8 +234,10 @@ class RegexDSLTests: XCTestCase { ("abcabc", "abcabc"), ("abcABCaBc", "abcABCaBc"), matchType: Substring.self, ==) { - OneOrMore { - "abc" + Regex { + OneOrMore { + "abc" + } }.ignoresCase(true) } @@ -247,8 +249,10 @@ class RegexDSLTests: XCTestCase { ("abcabc", "abcabc"), ("abcABCaBc", "abcABCaBc"), matchType: Substring.self, ==) { - OneOrMore { - "abc" + Regex { + OneOrMore { + "abc" + } } .ignoresCase(true) .ignoresCase(false) @@ -264,9 +268,13 @@ class RegexDSLTests: XCTestCase { ("abcabc", "abcabc"), ("abcdeABCdeaBcde", "abcdeABCdeaBcde"), matchType: Substring.self, ==) { - OneOrMore { - "abc".ignoresCase(true) - Optionally("de") + Regex { + OneOrMore { + Regex { + "abc" + }.ignoresCase(true) + Optionally("de") + } } .ignoresCase(false) } @@ -303,11 +311,13 @@ class RegexDSLTests: XCTestCase { "stop" " " - Capture { - OneOrMore(.word) - Anchor.wordBoundary - } - .wordBoundaryKind(.unicodeLevel1) + Regex { + Capture { + OneOrMore(.word) + Anchor.wordBoundary + } + }.wordBoundaryKind(.simple) + OneOrMore(.any, .reluctant) "stop" } @@ -317,15 +327,17 @@ class RegexDSLTests: XCTestCase { matchType: (Substring, Substring, Substring).self, ==) { Capture { // Reluctant behavior due to option - OneOrMore(.anyOf("abcd")) - .repetitionBehavior(.reluctant) + Regex { + OneOrMore(.anyOf("abcd")) + }.repetitionBehavior(.reluctant) } ZeroOrMore("a"..."z") Capture { // Eager behavior due to explicit parameter, despite option - OneOrMore(.digit, .eager) - .repetitionBehavior(.reluctant) + Regex { + OneOrMore(.digit, .eager) + }.repetitionBehavior(.reluctant) } ZeroOrMore(.digit) } @@ -334,10 +346,11 @@ class RegexDSLTests: XCTestCase { ("abcdefg", ("abcdefg", "abcdefg")), ("abcdéfg", ("abcdéfg", "abcd")), matchType: (Substring, Substring).self, ==) { - Capture { - OneOrMore(.word) - } - .asciiOnlyWordCharacters() + Regex { + Capture { + OneOrMore(.word) + } + }.asciiOnlyWordCharacters() ZeroOrMore(.any) } @@ -368,8 +381,10 @@ class RegexDSLTests: XCTestCase { ("abc1def2", ("abc1def2", "1")), matchType: (Substring, Substring).self, ==) { - OneOrMore(.reluctant) { - One(.word) + Regex { + OneOrMore(.reluctant) { + One(.word) + } }.repetitionBehavior(.possessive) Capture(.digit) ZeroOrMore(.any) @@ -421,8 +436,9 @@ class RegexDSLTests: XCTestCase { { Regex { Capture { - OneOrMore("a") - .repetitionBehavior(.eager) + Regex { + OneOrMore("a") + }.repetitionBehavior(.eager) } OneOrMore("a") }.repetitionBehavior(.possessive) @@ -434,8 +450,9 @@ class RegexDSLTests: XCTestCase { { Regex { Capture { - OneOrMore("a") - .repetitionBehavior(.reluctant) + Regex { + OneOrMore("a") + }.repetitionBehavior(.reluctant) } OneOrMore("a") }.repetitionBehavior(.possessive) diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift index fa8a1729d..aa3639ea6 100644 --- a/Tests/RegexTests/UTS18Tests.swift +++ b/Tests/RegexTests/UTS18Tests.swift @@ -222,7 +222,7 @@ extension UTS18Tests { // - Nonspacing marks are never divided from their base characters, and // otherwise ignored in locating boundaries. func testSimpleWordBoundaries() { - let simpleWordRegex = regex(#".+?\b"#).wordBoundaryKind(.unicodeLevel1) + let simpleWordRegex = regex(#".+?\b"#).wordBoundaryKind(.simple) expectFirstMatch(input, simpleWordRegex, input[pos: ..<11]) expectFirstMatch("don't", simpleWordRegex, "don") expectFirstMatch("Cafe\u{301}", simpleWordRegex, "Café")