From 0552f74690abd65919d69ef7db87b4f1f141c271 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Mon, 18 Apr 2022 14:13:30 -0500
Subject: [PATCH 1/3] Add word boundary kind type

---
 Sources/_StringProcessing/Regex/Options.swift | 43 ++++++++++++++++---
 Tests/RegexBuilderTests/RegexDSLTests.swift   | 24 +++++++++++
 2 files changed, 60 insertions(+), 7 deletions(-)
diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift
index d474caae3..d4f76e611 100644
--- a/Sources/_StringProcessing/Regex/Options.swift
+++ b/Sources/_StringProcessing/Regex/Options.swift
@@ -41,13 +41,9 @@ extension RegexComponent {
     wrapInOption(.asciiOnlyPOSIXProps, addingIf: useASCII)
   }
   
-  /// Returns a regular expression that uses the Unicode word boundary
-  /// algorithm.
-  ///
-  /// This option is enabled by default; pass `false` to disable use of
-  /// Unicode's word boundary algorithm.
-  public func usingUnicodeWordBoundaries(_ useUnicodeWordBoundaries: Bool = true) -> Regex<RegexOutput> {
-    wrapInOption(.unicodeWordBoundaries, addingIf: useUnicodeWordBoundaries)
+  /// Returns a regular expression that uses the specified word boundary algorithm.
+  public func identifyingWordBoundaries(with wordBoundaryKind: RegexWordBoundaryKind) -> Regex<RegexOutput> {
+    wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .unicodeLevel2)
   }
   
   /// Returns a regular expression where the start and end of input
@@ -107,6 +103,7 @@ extension RegexComponent {
 }
 
 @available(SwiftStdlib 5.7, *)
+/// A semantic level to use during regex matching.
 public struct RegexSemanticLevel: Hashable {
   internal enum Representation {
     case graphemeCluster
@@ -128,6 +125,38 @@ public struct RegexSemanticLevel: Hashable {
   }
 }
 
+@available(SwiftStdlib 5.7, *)
+/// A word boundary algorithm to use during regex matching.
+public struct RegexWordBoundaryKind: Hashable {
+  internal enum Representation {
+    case unicodeLevel1
+    case unicodeLevel2
+  }
+  
+  internal var base: Representation
+
+  /// A word boundary algorithm that implements the "simple word boundary"
+  /// Unicode recommendation.
+  ///
+  /// A simple word boundary is a position in the input between two characters
+  /// that match `/\w\W/` or `/\W\w/`, or between the start or end of the input
+  /// and a `\w` character. Word boundaries therefore depend on the option-
+  /// defined behavior of `\w`.
+  public static var unicodeLevel1: Self {
+    .init(base: .unicodeLevel1)
+  }
+
+  /// A word boundary algorithm that implements the "default word boundary"
+  /// Unicode recommendation.
+  ///
+  /// Default word boundaries use a Unicode algorithm that handles some cases
+  /// better than simple word boundaries, such as words with internal
+  /// punctuation, changes in script, and Emoji.
+  public static var unicodeLevel2: Self {
+    .init(base: .unicodeLevel2)
+  }
+}
+
 // Options that only affect literals
 @available(SwiftStdlib 5.7, *)
 extension RegexComponent {
diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
index 8159ba8ae..bed4659bc 100644
--- a/Tests/RegexBuilderTests/RegexDSLTests.swift
+++ b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -262,6 +262,30 @@ class RegexDSLTests: XCTestCase {
         }
         .ignoringCase(false)
       }
+    
+#if os(macOS)
+    try XCTExpectFailure("Implement level 2 word boundaries") {
+      try _testDSLCaptures(
+        ("can't stop won't stop", ("can't stop won't stop", "can't", "won")),
+        matchType: (Substring, Substring, Substring).self, ==) {
+          Capture {
+            OneOrMore(.word)
+            Anchor.wordBoundary
+          }
+          OneOrMore(.any, .reluctantly)
+          "stop"
+          " "
+          
+          Capture {
+            OneOrMore(.word)
+            Anchor.wordBoundary
+          }
+          .identifyingWordBoundaries(with: .unicodeLevel1)
+          OneOrMore(.any, .reluctantly)
+          "stop"
+        }
+    }
+#endif
   }
   
   func testQuantificationBehavior() throws {

From 1d68bc84f68945c999a6bc0559f4f4b0a7c81418 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Mon, 18 Apr 2022 15:11:12 -0500
Subject: [PATCH 2/3] Skip XFAIL'd test on Linux

---
 Tests/RegexBuilderTests/RegexDSLTests.swift | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
index 81d258fd0..98b309a5b 100644
--- a/Tests/RegexBuilderTests/RegexDSLTests.swift
+++ b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -263,6 +263,7 @@ class RegexDSLTests: XCTestCase {
         .ignoringCase(false)
       }
     
+#if os(macOS)
     try XCTExpectFailure("Implement level 2 word boundaries") {
       try _testDSLCaptures(
         ("can't stop won't stop", ("can't stop won't stop", "can't", "won")),
@@ -284,7 +285,8 @@ class RegexDSLTests: XCTestCase {
           "stop"
         }
     }
-
+#endif
+    
     try _testDSLCaptures(
       ("abcdef123", ("abcdef123", "a", "123")),
       matchType: (Substring, Substring, Substring).self, ==) {

From b63e2ed6994afe845dafcdcb6a636737b951518f Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Mon, 18 Apr 2022 15:28:04 -0500
Subject: [PATCH 3/3] Nominalize all matching option modifiers

---
 Sources/_StringProcessing/Regex/Options.swift | 16 ++++++-------
 Tests/RegexBuilderTests/RegexDSLTests.swift   | 24 ++++++++++++++-----
 Tests/RegexTests/MatchTests.swift             |  2 +-
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift
index b4fc43b1a..623589b54 100644
--- a/Sources/_StringProcessing/Regex/Options.swift
+++ b/Sources/_StringProcessing/Regex/Options.swift
@@ -14,35 +14,35 @@
 @available(SwiftStdlib 5.7, *)
 extension RegexComponent {
   /// Returns a regular expression that ignores casing when matching.
-  public func ignoringCase(_ ignoreCase: Bool = true) -> Regex<RegexOutput> {
-    wrapInOption(.caseInsensitive, addingIf: ignoreCase)
+  public func ignoresCase(_ ignoresCase: Bool = true) -> Regex<RegexOutput> {
+    wrapInOption(.caseInsensitive, addingIf: ignoresCase)
   }
 
   /// Returns a regular expression that only matches ASCII characters as "word
   /// characters".
-  public func usingASCIIWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
-    wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
+  public func asciiOnlyWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+    wrapInOption(.asciiOnlyWord, addingIf: useASCII)
   }
 
   /// Returns a regular expression that only matches ASCII characters as digits.
-  public func usingASCIIDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+  public func asciiOnlyDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
     wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
   }
 
   /// Returns a regular expression that only matches ASCII characters as space
   /// characters.
-  public func usingASCIISpaces(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+  public func asciiOnlyWhitespace(_ useASCII: Bool = true) -> Regex<RegexOutput> {
     wrapInOption(.asciiOnlySpace, addingIf: useASCII)
   }
 
   /// Returns a regular expression that only matches ASCII characters when
   /// matching character classes.
-  public func usingASCIICharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+  public func asciiOnlyCharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
     wrapInOption(.asciiOnlyPOSIXProps, addingIf: useASCII)
   }
   
   /// Returns a regular expression that uses the specified word boundary algorithm.
-  public func identifyingWordBoundaries(with wordBoundaryKind: RegexWordBoundaryKind) -> Regex<RegexOutput> {
+  public func wordBoundaryKind(_ wordBoundaryKind: RegexWordBoundaryKind) -> Regex<RegexOutput> {
     wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .unicodeLevel2)
   }
   
diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
index 98b309a5b..897bca8f7 100644
--- a/Tests/RegexBuilderTests/RegexDSLTests.swift
+++ b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -228,7 +228,7 @@ class RegexDSLTests: XCTestCase {
       matchType: Substring.self, ==) {
         OneOrMore {
           "abc"
-        }.ignoringCase(true)
+        }.ignoresCase(true)
       }
     
     // Multiple options on one component wrap successively, but do not
@@ -242,8 +242,8 @@ class RegexDSLTests: XCTestCase {
         OneOrMore {
           "abc"
         }
-        .ignoringCase(true)
-        .ignoringCase(false)
+        .ignoresCase(true)
+        .ignoresCase(false)
       }
 
     // An option on an outer component doesn't override an option set on an
@@ -257,10 +257,10 @@ class RegexDSLTests: XCTestCase {
       ("abcdeABCdeaBcde", "abcdeABCdeaBcde"),
       matchType: Substring.self, ==) {
         OneOrMore {
-          "abc".ignoringCase(true)
+          "abc".ignoresCase(true)
           Optionally("de")
         }
-        .ignoringCase(false)
+        .ignoresCase(false)
       }
     
 #if os(macOS)
@@ -280,7 +280,7 @@ class RegexDSLTests: XCTestCase {
             OneOrMore(.word)
             Anchor.wordBoundary
           }
-          .identifyingWordBoundaries(with: .unicodeLevel1)
+          .wordBoundaryKind(.unicodeLevel1)
           OneOrMore(.any, .reluctantly)
           "stop"
         }
@@ -304,6 +304,18 @@ class RegexDSLTests: XCTestCase {
         }
         ZeroOrMore(.digit)
       }
+    
+    try _testDSLCaptures(
+      ("abcdefg", ("abcdefg", "abcdefg")),
+      ("abcdéfg", ("abcdéfg", "abcd")),
+      matchType: (Substring, Substring).self, ==) {
+        Capture {
+          OneOrMore(.word)
+        }
+        .asciiOnlyWordCharacters()
+        
+        ZeroOrMore(.any)
+      }
   }
   
   func testQuantificationBehavior() throws {
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index 8e92c5936..4d9ed4d01 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -1337,7 +1337,7 @@ extension RegexTests {
     XCTAssertTrue ("cafe".contains(regex))
     XCTAssertFalse("CaFe".contains(regex))
     
-    let caseInsensitiveRegex = regex.ignoringCase()
+    let caseInsensitiveRegex = regex.ignoresCase()
     XCTAssertTrue("cafe".contains(caseInsensitiveRegex))
     XCTAssertTrue("CaFe".contains(caseInsensitiveRegex))
   }