From 79e4c65f6a8279fdd62a32131745f368791c076c Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Fri, 17 Jun 2022 14:53:58 -0500
Subject: [PATCH 01/18] Disentangle disparate 'bounds' ideas in processor

This starts the separation of two different ideas for boundaries in
the base input:

- subjectBounds: These represent the actual subject in the input
  string. For a `String` callee, this will cover the entire bounds,
  while for a `Substring` these will represent the bounds of the
  substring in the base.
- searchBounds: These represent the current search range in the
  subject. These bounds can be the same as `subjectBounds` or a
  subrange when searching for subsequent matches or replacing only
  in a subrange of a string.
---
 Sources/_StringProcessing/ByteCodeGen.swift   | 27 +++++----
 .../_StringProcessing/Engine/Consume.swift    | 17 +++++-
 .../_StringProcessing/Engine/Processor.swift  | 59 +++++++++++++------
 Sources/_StringProcessing/Executor.swift      | 10 ++--
 Sources/_StringProcessing/Regex/Match.swift   | 57 +++++++++++++-----
 .../_CharacterClassModel.swift                |  6 +-
 6 files changed, 123 insertions(+), 53 deletions(-)

diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index 8407f68ac..81a5183e7 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -97,25 +97,25 @@ fileprivate extension Compiler.ByteCodeGen {
     switch kind {
     case .startOfSubject:
       builder.buildAssert { (input, pos, bounds) in
-        pos == input.startIndex
+        pos == bounds.lowerBound
       }
 
     case .endOfSubjectBeforeNewline:
       builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-        if pos == input.endIndex { return true }
+        if pos == bounds.upperBound { return true }
         switch semanticLevel {
         case .graphemeCluster:
-          return input.index(after: pos) == input.endIndex
+          return input.index(after: pos) == bounds.upperBound
            && input[pos].isNewline
         case .unicodeScalar:
-          return input.unicodeScalars.index(after: pos) == input.endIndex
+          return input.unicodeScalars.index(after: pos) == bounds.upperBound
            && input.unicodeScalars[pos].isNewline
         }
       }
 
     case .endOfSubject:
       builder.buildAssert { (input, pos, bounds) in
-        pos == input.endIndex
+        pos == bounds.upperBound
       }
 
     case .resetStartOfMatch:
@@ -124,9 +124,10 @@ fileprivate extension Compiler.ByteCodeGen {
 
     case .firstMatchingPositionInSubject:
       // TODO: We can probably build a nice model with API here
-      builder.buildAssert { (input, pos, bounds) in
-        pos == bounds.lowerBound
-      }
+      
+      // FIXME: This needs to be based on `searchBounds`,
+      // not the `subjectBounds` given as an argument here
+      builder.buildAssert { (input, pos, bounds) in false }
 
     case .textSegment:
       builder.buildAssert { (input, pos, _) in
@@ -141,9 +142,10 @@ fileprivate extension Compiler.ByteCodeGen {
       }
 
     case .startOfLine:
+      // FIXME: Anchor.startOfLine must always use this first branch
       if options.anchorsMatchNewlines {
         builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-          if pos == input.startIndex { return true }
+          if pos == bounds.lowerBound { return true }
           switch semanticLevel {
           case .graphemeCluster:
             return input[input.index(before: pos)].isNewline
@@ -153,14 +155,15 @@ fileprivate extension Compiler.ByteCodeGen {
         }
       } else {
         builder.buildAssert { (input, pos, bounds) in
-          pos == input.startIndex
+          pos == bounds.lowerBound
         }
       }
       
     case .endOfLine:
+      // FIXME: Anchor.endOfLine must always use this first branch
       if options.anchorsMatchNewlines {
         builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-          if pos == input.endIndex { return true }
+          if pos == bounds.upperBound { return true }
           switch semanticLevel {
           case .graphemeCluster:
             return input[pos].isNewline
@@ -170,7 +173,7 @@ fileprivate extension Compiler.ByteCodeGen {
         }
       } else {
         builder.buildAssert { (input, pos, bounds) in
-          pos == input.endIndex
+          pos == bounds.upperBound
         }
       }
 
diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift
index bc60ba260..540e1bdc5 100644
--- a/Sources/_StringProcessing/Engine/Consume.swift
+++ b/Sources/_StringProcessing/Engine/Consume.swift
@@ -18,10 +18,25 @@ extension Engine {
     Processor(
       program: program,
       input: input,
-      bounds: bounds,
+      subjectBounds: bounds,
+      searchBounds: bounds,
       matchMode: matchMode,
       isTracingEnabled: enableTracing)
   }
+  
+  func makeFirstMatchProcessor(
+    input: Input,
+    subjectBounds: Range<Input.Index>,
+    searchBounds: Range<Input.Index>
+  ) -> Processor<Input> {
+    Processor(
+      program: program,
+      input: input,
+      subjectBounds: subjectBounds,
+      searchBounds: searchBounds,
+      matchMode: .partialFromFront,
+      isTracingEnabled: enableTracing)
+  }
 }
 
 extension Processor where Input == String {
diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index da8bfea14..573a4db09 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -31,11 +31,33 @@ struct Processor<
 > where Input.Element: Equatable { // maybe Hashable?
   typealias Element = Input.Element
 
+  /// The base collection of the subject to search.
+  ///
+  /// Taken together, `input` and `subjectBounds` define the actual subject
+  /// of the search. `input` can be a "supersequence" of the subject, while
+  /// `input[subjectBounds]` is the logical entity that is being searched.
   let input: Input
-  let bounds: Range<Position>
-  let matchMode: MatchMode
+  
+  /// The bounds of the logical subject in `input`.
+  ///
+  ///
+  /// `subjectBounds` is equal to or a subrange of
+  /// `input.startIndex..<input.endIndex`.
+  let subjectBounds: Range<Position>
+  
+  /// The bounds within the subject for an individual search.
+  ///
+  /// `searchBounds` is equal to `subjectBounds` in most cases, but can be a
+  /// subrange when performing operations like `str.replacing(_:with:subrange:)`.
+  let searchBounds: Range<Position>
+  
+  /// The current search position while processing.
+  ///
+  /// `currentPosition` must always be in the range `subjectBounds` or equal
+  /// to `subjectBounds.upperBound`.
   var currentPosition: Position
-
+  
+  let matchMode: MatchMode
   let instructions: InstructionList<Instruction>
   var controller: Controller
 
@@ -61,27 +83,30 @@ struct Processor<
 extension Processor {
   typealias Position = Input.Index
 
-  var start: Position { bounds.lowerBound }
-  var end: Position { bounds.upperBound }
+  var start: Position { subjectBounds.lowerBound }
+  var end: Position { subjectBounds.upperBound }
 }
 
 extension Processor {
   init(
     program: MEProgram<Input>,
     input: Input,
-    bounds: Range<Position>,
+    subjectBounds: Range<Position>,
+    searchBounds: Range<Position>,
     matchMode: MatchMode,
     isTracingEnabled: Bool
   ) {
     self.controller = Controller(pc: 0)
     self.instructions = program.instructions
     self.input = input
-    self.bounds = bounds
+    self.subjectBounds = subjectBounds
+    self.searchBounds = searchBounds
     self.matchMode = matchMode
     self.isTracingEnabled = isTracingEnabled
-    self.currentPosition = bounds.lowerBound
+    self.currentPosition = searchBounds.lowerBound
 
-    self.registers = Registers(program, bounds.upperBound)
+    // Initialize registers with end of search bounds
+    self.registers = Registers(program, searchBounds.upperBound)
     self.storedCaptures = Array(
        repeating: .init(), count: program.registerInfo.captures)
 
@@ -100,7 +125,7 @@ extension Processor {
   var slice: Input.SubSequence {
     // TODO: Should we whole-scale switch to slices, or
     // does that depend on options for some anchors?
-    input[bounds]
+    input[subjectBounds]
   }
 
   // Advance in our input
@@ -123,8 +148,8 @@ extension Processor {
   /// - Precondition: `bounds.contains(index) || index == bounds.upperBound`
   /// - Precondition: `index >= currentPosition`
   mutating func resume(at index: Input.Index) {
-    assert(index >= bounds.lowerBound)
-    assert(index <= bounds.upperBound)
+    assert(index >= subjectBounds.lowerBound)
+    assert(index <= subjectBounds.upperBound)
     assert(index >= currentPosition)
     currentPosition = index
   }
@@ -199,7 +224,7 @@ extension Processor {
     switch (currentPosition, matchMode) {
     // When reaching the end of the match bounds or when we are only doing a
     // prefix match, transition to accept.
-    case (bounds.upperBound, _), (_, .partialFromFront):
+    case (subjectBounds.upperBound, _), (_, .partialFromFront):
       state = .accept
 
     // When we are doing a full match but did not reach the end of the match
@@ -371,9 +396,9 @@ extension Processor {
 
     case .consumeBy:
       let reg = payload.consumer
-      guard currentPosition < bounds.upperBound,
+      guard currentPosition < subjectBounds.upperBound,
             let nextIndex = registers[reg](
-              input, currentPosition..<bounds.upperBound)
+              input, currentPosition..<subjectBounds.upperBound)
       else {
         signalFailure()
         return
@@ -385,7 +410,7 @@ extension Processor {
       let reg = payload.assertion
       let assertion = registers[reg]
       do {
-        guard try assertion(input, currentPosition, bounds) else {
+        guard try assertion(input, currentPosition, subjectBounds) else {
           signalFailure()
           return
         }
@@ -400,7 +425,7 @@ extension Processor {
       let matcher = registers[matcherReg]
       do {
         guard let (nextIdx, val) = try matcher(
-          input, currentPosition, bounds
+          input, currentPosition, subjectBounds
         ) else {
           signalFailure()
           return
diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift
index 941d5943a..4172546b3 100644
--- a/Sources/_StringProcessing/Executor.swift
+++ b/Sources/_StringProcessing/Executor.swift
@@ -22,11 +22,11 @@ struct Executor {
   @available(SwiftStdlib 5.7, *)
   func match<Output>(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    in subjectBounds: Range<String.Index>,
     _ mode: MatchMode
   ) throws -> Regex<Output>.Match? {
     var cpu = engine.makeProcessor(
-      input: input, bounds: inputRange, matchMode: mode)
+      input: input, bounds: subjectBounds, matchMode: mode)
 
     guard let endIdx = cpu.consume() else {
       if let e = cpu.failureReason {
@@ -39,7 +39,7 @@ struct Executor {
       values: cpu.storedCaptures,
       referencedCaptureOffsets: engine.program.referencedCaptureOffsets)
 
-    let range = inputRange.lowerBound..<endIdx
+    let range = subjectBounds.lowerBound..<endIdx
     let caps = engine.program.captureList.createElements(capList)
 
     let anyRegexOutput = AnyRegexOutput(input: input, elements: caps)
@@ -49,9 +49,9 @@ struct Executor {
   @available(SwiftStdlib 5.7, *)
   func dynamicMatch(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    in subjectBounds: Range<String.Index>,
     _ mode: MatchMode
   ) throws -> Regex<AnyRegexOutput>.Match? {
-    try match(input, in: inputRange, mode)
+    try match(input, in: subjectBounds, mode)
   }
 }
diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift
index 831c758dd..d423059ce 100644
--- a/Sources/_StringProcessing/Regex/Match.swift
+++ b/Sources/_StringProcessing/Regex/Match.swift
@@ -126,32 +126,59 @@ extension Regex {
 
   func _match(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    in subjectBounds: Range<String.Index>,
     mode: MatchMode = .wholeString
   ) throws -> Regex<Output>.Match? {
     let executor = Executor(program: regex.program.loweredProgram)
-    return try executor.match(input, in: inputRange, mode)
+    return try executor.match(input, in: subjectBounds, mode)
   }
 
   func _firstMatch(
     _ input: String,
-    in inputRange: Range<String.Index>
+    in subjectBounds: Range<String.Index>
   ) throws -> Regex<Output>.Match? {
-    // FIXME: Something more efficient, likely an engine interface, and we
-    // should scrap the RegexConsumer crap and call this
+    try _firstMatch(input, subjectBounds: subjectBounds, searchBounds: subjectBounds)
+  }
+
+  func _firstMatch(
+    _ input: String,
+    subjectBounds: Range<String.Index>,
+    searchBounds: Range<String.Index>
+  ) throws -> Regex<Output>.Match? {
+    let executor = Executor(program: regex.program.loweredProgram)
 
-    var low = inputRange.lowerBound
-    let high = inputRange.upperBound
+    var low = searchBounds.lowerBound
+    let high = searchBounds.upperBound
     while true {
-      if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
-        return m
-      }
-      if low >= high { return nil }
-      if regex.initialOptions.semanticLevel == .graphemeCluster {
-        input.formIndex(after: &low)
-      } else {
-        input.unicodeScalars.formIndex(after: &low)
+      // FIXME: Make once and reset at this point (or after search)
+      var cpu = executor.engine.makeFirstMatchProcessor(
+        input: input, subjectBounds: subjectBounds, searchBounds: searchBounds)
+      cpu.currentPosition = low
+
+      guard let endIdx = cpu.consume() else {
+        if let e = cpu.failureReason {
+          throw e
+        }
+        
+        if regex.initialOptions.semanticLevel == .graphemeCluster {
+          input.formIndex(after: &low)
+        } else {
+          input.unicodeScalars.formIndex(after: &low)
+        }
+        if low >= high { return nil }
+        
+        continue
       }
+
+      let capList = MECaptureList(
+        values: cpu.storedCaptures,
+        referencedCaptureOffsets: executor.engine.program.referencedCaptureOffsets)
+
+      let range = searchBounds.lowerBound..<endIdx
+      let caps = executor.engine.program.captureList.createElements(capList)
+
+      let anyRegexOutput = AnyRegexOutput(input: input, elements: caps)
+      return .init(anyRegexOutput: anyRegexOutput, range: range)
     }
   }
 }
diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift
index 352c267a5..db2088782 100644
--- a/Sources/_StringProcessing/_CharacterClassModel.swift
+++ b/Sources/_StringProcessing/_CharacterClassModel.swift
@@ -577,12 +577,12 @@ extension _CharacterClassModel {
   ) -> Bool {
     // FIXME: How should we handle bounds?
     // We probably need two concepts
-    if input.isEmpty { return false }
-    if pos == input.startIndex {
+    if bounds.isEmpty { return false }
+    if pos == bounds.lowerBound {
       return self.matches(in: input, at: pos, with: options) != nil
     }
     let priorIdx = input.index(before: pos)
-    if pos == input.endIndex {
+    if pos == bounds.upperBound {
       return self.matches(in: input, at: priorIdx, with: options) != nil
     }
 

From 8b585aff6fd77dcfc37ca7c8a66298c9ea6c546e Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Fri, 17 Jun 2022 15:59:49 -0500
Subject: [PATCH 02/18] Stop testing through the Executor

We should be testing the higher-level Regex.firstMatch function
instead of the lower-level types directly.
---
 Tests/RegexTests/MatchTests.swift | 50 +++++++------------------------
 1 file changed, 10 insertions(+), 40 deletions(-)

diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index 35f8a9548..c5a9e19d2 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -20,44 +20,17 @@ struct MatchError: Error {
     }
 }
 
-extension Executor {
-  func _firstMatch(
-    _ regex: String, input: String,
-    syntax: SyntaxOptions = .traditional,
-    enableTracing: Bool = false
-  ) throws -> (match: Substring, captures: [Substring?]) {
-    // TODO: This should be a CollectionMatcher API to call...
-    // Consumer -> searcher algorithm
-    var start = input.startIndex
-    while true {
-      if let result = try! self.dynamicMatch(
-        input,
-        in: start..<input.endIndex,
-        .partialFromFront
-      ) {
-        let caps = result.anyRegexOutput.slices(from: input)
-        return (input[result.range], caps)
-      } else if start == input.endIndex {
-        throw MatchError("match not found for \(regex) in \(input)")
-      } else {
-        input.formIndex(after: &start)
-      }
-    }
-  }
-}
-
 func _firstMatch(
-  _ regex: String,
+  _ regexStr: String,
   input: String,
-  syntax: SyntaxOptions = .traditional,
-  enableTracing: Bool = false
+  syntax: SyntaxOptions = .traditional
 ) throws -> (String, [String?]) {
-  var executor = try _compileRegex(regex, syntax)
-  executor.engine.enableTracing = enableTracing
-  let (str, caps) = try executor._firstMatch(
-    regex, input: input, enableTracing: enableTracing)
-  let capStrs = caps.map { $0 == nil ? nil : String($0!) }
-  return (String(str), capStrs)
+  let regex = try Regex(regexStr)
+  guard let result = try regex.firstMatch(in: input) else {
+    throw MatchError("match not found for \(regexStr) in \(input)")
+  }
+  let caps = result.output.slices(from: input)
+  return (String(input[result.range]), caps.map { $0.map(String.init) })
 }
 
 // TODO: multiple-capture variant
@@ -66,7 +39,6 @@ func flatCaptureTest(
   _ regex: String,
   _ tests: (input: String, expect: [String?]?)...,
   syntax: SyntaxOptions = .traditional,
-  enableTracing: Bool = false,
   dumpAST: Bool = false,
   xfail: Bool = false,
   file: StaticString = #file,
@@ -77,8 +49,7 @@ func flatCaptureTest(
       guard var (_, caps) = try? _firstMatch(
         regex,
         input: test,
-        syntax: syntax,
-        enableTracing: enableTracing
+        syntax: syntax
       ) else {
         if expect == nil {
           continue
@@ -162,8 +133,7 @@ func firstMatchTest(
     let (found, _) = try _firstMatch(
       regex,
       input: input,
-      syntax: syntax,
-      enableTracing: enableTracing)
+      syntax: syntax)
 
     if xfail {
       XCTAssertNotEqual(found, match, file: file, line: line)

From a6ff9a5ac49b455a50cd671cb2a8b9f4ee6e8376 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Fri, 17 Jun 2022 16:00:23 -0500
Subject: [PATCH 03/18] Fix firstMatch implementation

---
 Sources/_StringProcessing/Regex/Match.swift | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift
index d423059ce..8a9a83b10 100644
--- a/Sources/_StringProcessing/Regex/Match.swift
+++ b/Sources/_StringProcessing/Regex/Match.swift
@@ -160,12 +160,12 @@ extension Regex {
           throw e
         }
         
+        if low >= high { return nil }
         if regex.initialOptions.semanticLevel == .graphemeCluster {
           input.formIndex(after: &low)
         } else {
           input.unicodeScalars.formIndex(after: &low)
         }
-        if low >= high { return nil }
         
         continue
       }
@@ -174,7 +174,7 @@ extension Regex {
         values: cpu.storedCaptures,
         referencedCaptureOffsets: executor.engine.program.referencedCaptureOffsets)
 
-      let range = searchBounds.lowerBound..<endIdx
+      let range = low..<endIdx
       let caps = executor.engine.program.captureList.createElements(capList)
 
       let anyRegexOutput = AnyRegexOutput(input: input, elements: caps)

From a87bc1b854c45bec6556a56f9971a7e132457a44 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Fri, 17 Jun 2022 16:00:38 -0500
Subject: [PATCH 04/18] Stop using RegexConsumer in contains(_:)

---
 Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift
index e481597f8..9d8e7349d 100644
--- a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift
+++ b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift
@@ -74,6 +74,6 @@ extension BidirectionalCollection where SubSequence == Substring {
   @_disfavoredOverload
   @available(SwiftStdlib 5.7, *)
   public func contains(_ regex: some RegexComponent) -> Bool {
-    _contains(RegexConsumer(regex))
+    (try? regex.regex.firstMatch(in: self[...])) != nil
   }
 }

From d1290f82d82bc789d37bd7e2eebd696eb2c8e0ed Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Fri, 17 Jun 2022 16:07:48 -0500
Subject: [PATCH 05/18] Update test xfailures

str.replacing(_:with:subrange:) still needs to use separate bounds
for the subject and the search range.
---
 Tests/RegexTests/MatchTests.swift | 46 ++++++++++++++-----------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index c5a9e19d2..5fbdd949b 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -700,11 +700,11 @@ extension RegexTests {
     firstMatchTest(#"[\Qa-c\E]+"#, input: "a-c", match: "a-c")
 
     firstMatchTest(#"["a-c"]+"#, input: "abc", match: "a",
-                   syntax: .experimental)
+                   syntax: .experimental, xfail: true)
     firstMatchTest(#"["abc"]+"#, input: "cba", match: "cba",
                    syntax: .experimental)
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: "abc",
-                   syntax: .experimental)
+                   syntax: .experimental, xfail: true)
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: #""abc""#)
   }
 
@@ -1460,35 +1460,31 @@ extension RegexTests {
     let postfixLetters = try Regex(#"[a-z]+$"#, as: Substring.self)
 
     // start anchor (^) should match beginning of substring
-    XCTExpectFailure {
-      XCTAssertEqual(trimmed.firstMatch(of: prefixLetters)?.output, "abc")
-    }
-    XCTExpectFailure {
-      XCTAssertEqual(trimmed.replacing(prefixLetters, with: ""), "456def")
-    }
+    XCTAssertEqual(trimmed.firstMatch(of: prefixLetters)?.output, "abc")
+    XCTAssertEqual(trimmed.replacing(prefixLetters, with: ""), "456def")
     
     // end anchor ($) should match end of substring
+    XCTAssertEqual(trimmed.firstMatch(of: postfixLetters)?.output, "def")
+    XCTAssertEqual(trimmed.replacing(postfixLetters, with: ""), "abc456")
+
+    // start anchor (^) should _not_ match beginning of subrange
     XCTExpectFailure {
-      XCTAssertEqual(trimmed.firstMatch(of: postfixLetters)?.output, "def")
+      XCTAssertEqual(
+        string.replacing(
+          prefixLetters,
+          with: "",
+          subrange: trimmed.startIndex..<trimmed.endIndex),
+        string)
     }
+    // end anchor ($) should _not_ match beginning of subrange
     XCTExpectFailure {
-      XCTAssertEqual(trimmed.replacing(postfixLetters, with: ""), "abc456")
+      XCTAssertEqual(
+        string.replacing(
+          postfixLetters,
+          with: "",
+          subrange: trimmed.startIndex..<trimmed.endIndex),
+        string)
     }
-
-    // start anchor (^) should _not_ match beginning of subrange
-    XCTAssertEqual(
-      string.replacing(
-        prefixLetters,
-        with: "",
-        subrange: trimmed.startIndex..<trimmed.endIndex),
-      string)
-    // end anchor ($) should _not_ match beginning of subrange
-    XCTAssertEqual(
-      string.replacing(
-        postfixLetters,
-        with: "",
-        subrange: trimmed.startIndex..<trimmed.endIndex),
-      string)
   }
   
   func testMatchingOptionsScope() {

From 80ccef67aa90052bed33dbf8164e161b4b59e8d8 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Mon, 20 Jun 2022 14:01:38 -0500
Subject: [PATCH 06/18] firstMatch shouldn't update searchBounds on iteration

When we move forward while searching for the first match, the search
bounds should stay the same. Only the currentPosition needs to move
forward. This will allow us to implement the \G start of match anchor,
with which /\Gab/ matches "abab" twice, compared with /^ab/, which
only matches once.
---
 .../_StringProcessing/Engine/Processor.swift  | 19 +++++++++----------
 Sources/_StringProcessing/Executor.swift      |  2 +-
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index dee77c30b..2fcd7de27 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -45,17 +45,17 @@ struct Processor<
   /// `input.startIndex..<input.endIndex`.
   let subjectBounds: Range<Position>
   
-  let matchMode: MatchMode
-  let instructions: InstructionList<Instruction>
-
-  // MARK: Resettable state
-
   /// The bounds within the subject for an individual search.
   ///
   /// `searchBounds` is equal to `subjectBounds` in some cases, but can be a
   /// subrange when performing operations like searching for matches iteratively
   /// or calling `str.replacing(_:with:subrange:)`.
-  var searchBounds: Range<Position>
+  let searchBounds: Range<Position>
+
+  let matchMode: MatchMode
+  let instructions: InstructionList<Instruction>
+
+  // MARK: Resettable state
   
   /// The current search position while processing.
   ///
@@ -115,13 +115,12 @@ extension Processor {
     _checkInvariants()
   }
 
-  mutating func reset(searchBounds: Range<Position>) {
-    self.searchBounds = searchBounds
-    self.currentPosition = self.searchBounds.lowerBound
+  mutating func reset(start: Position, end: Position) {
+    self.currentPosition = start
 
     self.controller = Controller(pc: 0)
 
-    self.registers.reset(sentinel: searchBounds.upperBound)
+    self.registers.reset(sentinel: end)
 
     self.savePoints.removeAll(keepingCapacity: true)
     self.callStack.removeAll(keepingCapacity: true)
diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift
index 432fef167..13ad5733c 100644
--- a/Sources/_StringProcessing/Executor.swift
+++ b/Sources/_StringProcessing/Executor.swift
@@ -45,7 +45,7 @@ struct Executor {
       } else {
         input.unicodeScalars.formIndex(after: &low)
       }
-      cpu.reset(searchBounds: low..<high)
+      cpu.reset(start: low, end: high)
     }
   }
 

From 5f56e395a3a49f2d12e6627ff13ba23f7a3abf2b Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Mon, 20 Jun 2022 15:04:28 -0500
Subject: [PATCH 07/18] Make matches(of:) and ranges(of:) boundary-aware

With this change, RegexMatchesCollection keeps the subject bounds
and search bounds separately, modifying the search bounds with each
iteration. In addition, the replace methods that only operate on a
subrange can specify that specifically, getting the correct anchor
behavior while only matching within a portion of a string.
---
 .../Algorithms/Algorithms/Ranges.swift        | 34 ++++++++++++--
 .../Algorithms/Algorithms/Replace.swift       |  5 +-
 .../Algorithms/Matching/Matches.swift         | 39 ++++++++++++----
 Tests/RegexTests/MatchTests.swift             | 46 +++++++++++--------
 4 files changed, 93 insertions(+), 31 deletions(-)

diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift
index 40732255c..fc6b23af2 100644
--- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift
+++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift
@@ -229,9 +229,18 @@ extension BidirectionalCollection where Element: Comparable {
 @available(SwiftStdlib 5.7, *)
 struct RegexRangesCollection<Output> {
   let base: RegexMatchesCollection<Output>
-  
-  init(string: Substring, regex: Regex<Output>) {
-    self.base = RegexMatchesCollection(base: string, regex: regex)
+
+  init(
+    input: String,
+    subjectBounds: Range<String.Index>,
+    searchBounds: Range<String.Index>,
+    regex: Regex<Output>
+  ) {
+    self.base = .init(
+      input: input,
+      subjectBounds: subjectBounds,
+      searchBounds: searchBounds,
+      regex: regex)
   }
 }
 
@@ -263,12 +272,29 @@ extension RegexRangesCollection: Collection {
 // MARK: Regex algorithms
 
 extension Collection where SubSequence == Substring {
+  @available(SwiftStdlib 5.7, *)
+  @_disfavoredOverload
+  func _ranges<R: RegexComponent>(
+    of regex: R,
+    subjectBounds: Range<String.Index>,
+    searchBounds: Range<String.Index>
+  ) -> RegexRangesCollection<R.RegexOutput> {
+    RegexRangesCollection(
+      input: self[...].base,
+      subjectBounds: subjectBounds,
+      searchBounds: searchBounds,
+      regex: regex.regex)
+  }
+  
   @available(SwiftStdlib 5.7, *)
   @_disfavoredOverload
   func _ranges<R: RegexComponent>(
     of regex: R
   ) -> RegexRangesCollection<R.RegexOutput> {
-    RegexRangesCollection(string: self[...], regex: regex.regex)
+    _ranges(
+      of: regex,
+      subjectBounds: startIndex..<endIndex,
+      searchBounds: startIndex..<endIndex)
   }
 }
 
diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift
index a3f876b0e..5e6c97973 100644
--- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift
+++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift
@@ -179,7 +179,10 @@ extension RangeReplaceableCollection where SubSequence == Substring {
     maxReplacements: Int = .max
   ) -> Self where Replacement.Element == Element {
     _replacing(
-      self[subrange]._ranges(of: regex),
+      self._ranges(
+        of: regex,
+        subjectBounds: startIndex..<endIndex,
+        searchBounds: subrange),
       with: replacement,
       maxReplacements: maxReplacements)
   }
diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift
index 094d3dfdd..04d34095b 100644
--- a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift
+++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift
@@ -185,14 +185,26 @@ extension BidirectionalCollection {
 
 @available(SwiftStdlib 5.7, *)
 struct RegexMatchesCollection<Output> {
-  let input: Substring
+  let input: String
+  let subjectBounds: Range<String.Index>
+  let searchBounds: Range<String.Index>
   let regex: Regex<Output>
   let startIndex: Index
   
-  init(base: Substring, regex: Regex<Output>) {
-    self.input = base
+  init(
+    input: String,
+    subjectBounds: Range<String.Index>,
+    searchBounds: Range<String.Index>,
+    regex: Regex<Output>
+  ) {
+    self.input = input
+    self.subjectBounds = subjectBounds
+    self.searchBounds = searchBounds
     self.regex = regex
-    self.startIndex = base.firstMatch(of: regex).map(Index.match) ?? .end
+    self.startIndex = (try? regex._firstMatch(
+      input,
+      subjectBounds: subjectBounds,
+      searchBounds: searchBounds)).map(Index.match) ?? .end
   }
 }
 
@@ -241,12 +253,15 @@ extension RegexMatchesCollection: Sequence {
       }
       
       // `nextStart` is `nil` when iteration has completed
-      guard let start = nextStart else {
+      guard let start = nextStart, start <= base.searchBounds.upperBound else {
         return nil
       }
       
       // Otherwise, find the next match (if any) and compute `nextStart`
-      let match = try? base.regex.firstMatch(in: base.input[start...])
+      let match = try? base.regex._firstMatch(
+        base.input,
+        subjectBounds: base.subjectBounds,
+        searchBounds: start..<base.searchBounds.upperBound)
       nextStart = match.flatMap(base.searchIndex(after:))
       return match
     }
@@ -310,7 +325,11 @@ extension RegexMatchesCollection: Collection {
     
     guard
       let start = searchIndex(after: currentMatch),
-      let nextMatch = try? regex.firstMatch(in: input[start...])
+      start <= searchBounds.upperBound,
+      let nextMatch = try? regex._firstMatch(
+        input,
+        subjectBounds: subjectBounds,
+        searchBounds: start..<searchBounds.upperBound)
     else {
       return .end
     }
@@ -331,7 +350,11 @@ extension BidirectionalCollection where SubSequence == Substring {
   func _matches<R: RegexComponent>(
     of regex: R
   ) -> RegexMatchesCollection<R.RegexOutput> {
-    RegexMatchesCollection(base: self[...], regex: regex.regex)
+    RegexMatchesCollection(
+      input: self[...].base,
+      subjectBounds: startIndex..<endIndex,
+      searchBounds: startIndex..<endIndex,
+      regex: regex.regex)
   }
 
   @available(SwiftStdlib 5.7, *)
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index 5fbdd949b..67c1346b4 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -1467,24 +1467,34 @@ extension RegexTests {
     XCTAssertEqual(trimmed.firstMatch(of: postfixLetters)?.output, "def")
     XCTAssertEqual(trimmed.replacing(postfixLetters, with: ""), "abc456")
 
-    // start anchor (^) should _not_ match beginning of subrange
-    XCTExpectFailure {
-      XCTAssertEqual(
-        string.replacing(
-          prefixLetters,
-          with: "",
-          subrange: trimmed.startIndex..<trimmed.endIndex),
-        string)
-    }
-    // end anchor ($) should _not_ match beginning of subrange
-    XCTExpectFailure {
-      XCTAssertEqual(
-        string.replacing(
-          postfixLetters,
-          with: "",
-          subrange: trimmed.startIndex..<trimmed.endIndex),
-        string)
-    }
+    // start anchor (^) should _not_ match beginning of replaced subrange
+    XCTAssertEqual(
+      string.replacing(
+        prefixLetters,
+        with: "",
+        subrange: trimmed.startIndex..<trimmed.endIndex),
+      string)
+    // end anchor ($) should _not_ match end of replaced subrange
+    XCTAssertEqual(
+      string.replacing(
+        postfixLetters,
+        with: "",
+        subrange: trimmed.startIndex..<trimmed.endIndex),
+      string)
+    
+    // if subrange == actual subject bounds, anchors _do_ match
+    XCTAssertEqual(
+      trimmed.replacing(
+        prefixLetters,
+        with: "",
+        subrange: trimmed.startIndex..<trimmed.endIndex),
+      "456def")
+    XCTAssertEqual(
+      trimmed.replacing(
+        postfixLetters,
+        with: "",
+        subrange: trimmed.startIndex..<trimmed.endIndex),
+      "abc456")
   }
   
   func testMatchingOptionsScope() {

From 46173f20bae2d2bb6600c13ac508b2cc8194c6c8 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 11:03:42 -0500
Subject: [PATCH 08/18] Improve subject/searchBounds documentation

---
 Sources/_StringProcessing/Engine/Processor.swift | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index 2fcd7de27..2994d37e6 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -40,8 +40,12 @@ struct Processor<
   
   /// The bounds of the logical subject in `input`.
   ///
+  /// `subjectBounds` represents the bounds of the string or substring that a
+  /// regex operation is invoked upon. Anchors like `^` and `.startOfSubject`
+  /// always use `subjectBounds` as their reference points, instead of
+  /// `input`'s boundaries or `searchBounds`.
   ///
-  /// `subjectBounds` is equal to or a subrange of
+  /// `subjectBounds` is always equal to or a subrange of
   /// `input.startIndex..<input.endIndex`.
   let subjectBounds: Range<Position>
   
@@ -50,6 +54,10 @@ struct Processor<
   /// `searchBounds` is equal to `subjectBounds` in some cases, but can be a
   /// subrange when performing operations like searching for matches iteratively
   /// or calling `str.replacing(_:with:subrange:)`.
+  ///
+  /// Anchors like `^` and `.startOfSubject` use `subjectBounds` instead of
+  /// `searchBounds`. The "start of matching" anchor `\G` uses `searchBounds`
+  /// as its starting point.
   let searchBounds: Range<Position>
 
   let matchMode: MatchMode

From 6dcb9fd621dc592af5430b03ab50e08ea39f0dd2 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 11:04:17 -0500
Subject: [PATCH 09/18] Processor.reset only needs the starting position

---
 Sources/_StringProcessing/Engine/Processor.swift | 6 +++---
 Sources/_StringProcessing/Executor.swift         | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index 2994d37e6..afa75ddf9 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -123,12 +123,12 @@ extension Processor {
     _checkInvariants()
   }
 
-  mutating func reset(start: Position, end: Position) {
-    self.currentPosition = start
+  mutating func reset(currentPosition: Position) {
+    self.currentPosition = currentPosition
 
     self.controller = Controller(pc: 0)
 
-    self.registers.reset(sentinel: end)
+    self.registers.reset(sentinel: searchBounds.upperBound)
 
     self.savePoints.removeAll(keepingCapacity: true)
     self.callStack.removeAll(keepingCapacity: true)
diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift
index 13ad5733c..e4399d2be 100644
--- a/Sources/_StringProcessing/Executor.swift
+++ b/Sources/_StringProcessing/Executor.swift
@@ -45,7 +45,7 @@ struct Executor {
       } else {
         input.unicodeScalars.formIndex(after: &low)
       }
-      cpu.reset(start: low, end: high)
+      cpu.reset(currentPosition: low)
     }
   }
 

From 45c5bd2e3fe7002db9c36189db18dd3993ff7862 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 11:06:07 -0500
Subject: [PATCH 10/18] Change Executor._match to use starting position

This still isn't quite right - Processor.consume() should really
return the matched range, since it already tracks the starting point
of matching. Having it return the whole range will also prepare us
for supporting `\K`, which will mean `currentPosition` isn't always
the start of the matched range.
---
 Sources/_StringProcessing/Executor.swift | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift
index e4399d2be..268854f92 100644
--- a/Sources/_StringProcessing/Executor.swift
+++ b/Sources/_StringProcessing/Executor.swift
@@ -35,7 +35,7 @@ struct Executor {
     let high = searchBounds.upperBound
     while true {
       if let m: Regex<Output>.Match = try _match(
-        input, in: low..<high, using: &cpu
+        input, from: low, using: &cpu
       ) {
         return m
       }
@@ -57,15 +57,17 @@ struct Executor {
   ) throws -> Regex<Output>.Match? {
     var cpu = engine.makeProcessor(
       input: input, bounds: subjectBounds, matchMode: mode)
-    return try _match(input, in: subjectBounds, using: &cpu)
+    return try _match(input, from: subjectBounds.lowerBound, using: &cpu)
   }
 
   @available(SwiftStdlib 5.7, *)
   func _match<Output>(
     _ input: String,
-    in subjectBounds: Range<String.Index>,
+    from currentPosition: String.Index,
     using cpu: inout Processor<String>
   ) throws -> Regex<Output>.Match? {
+    // FIXME: currentPosition is already encapsulated in cpu, don't pass in
+    // FIXME: cpu.consume() should return the matched range, not the upper bound
     guard let endIdx = cpu.consume() else {
       if let e = cpu.failureReason {
         throw e
@@ -77,7 +79,7 @@ struct Executor {
       values: cpu.storedCaptures,
       referencedCaptureOffsets: engine.program.referencedCaptureOffsets)
 
-    let range = subjectBounds.lowerBound..<endIdx
+    let range = currentPosition..<endIdx
     let caps = engine.program.captureList.createElements(capList)
 
     let anyRegexOutput = AnyRegexOutput(input: input, elements: caps)

From e80fe54d2d55fdc66b57e982de236ccee0a4731f Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 11:06:22 -0500
Subject: [PATCH 11/18] Audit search/subjectBounds usage

---
 Sources/_StringProcessing/Engine/Processor.swift | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index afa75ddf9..16f8d5085 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -184,8 +184,8 @@ extension Processor {
   /// - Precondition: `bounds.contains(index) || index == bounds.upperBound`
   /// - Precondition: `index >= currentPosition`
   mutating func resume(at index: Input.Index) {
-    assert(index >= subjectBounds.lowerBound)
-    assert(index <= subjectBounds.upperBound)
+    assert(index >= searchBounds.lowerBound)
+    assert(index <= searchBounds.upperBound)
     assert(index >= currentPosition)
     currentPosition = index
   }
@@ -256,7 +256,7 @@ extension Processor {
     switch (currentPosition, matchMode) {
     // When reaching the end of the match bounds or when we are only doing a
     // prefix match, transition to accept.
-    case (subjectBounds.upperBound, _), (_, .partialFromFront):
+    case (searchBounds.upperBound, _), (_, .partialFromFront):
       state = .accept
 
     // When we are doing a full match but did not reach the end of the match
@@ -434,9 +434,9 @@ extension Processor {
 
     case .consumeBy:
       let reg = payload.consumer
-      guard currentPosition < subjectBounds.upperBound,
+      guard currentPosition < searchBounds.upperBound,
             let nextIndex = registers[reg](
-              input, currentPosition..<subjectBounds.upperBound)
+              input, currentPosition..<searchBounds.upperBound)
       else {
         signalFailure()
         return

From 7c125bc3dd7fb0b6df26ca6f6031ac666821fc60 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 11:13:57 -0500
Subject: [PATCH 12/18] Further subject/searchBounds auditing

---
 Sources/_StringProcessing/Engine/Processor.swift | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index 16f8d5085..618f9a47e 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -93,8 +93,8 @@ struct Processor<
 extension Processor {
   typealias Position = Input.Index
 
-  var start: Position { subjectBounds.lowerBound }
-  var end: Position { subjectBounds.upperBound }
+  var start: Position { searchBounds.lowerBound }
+  var end: Position { searchBounds.upperBound }
 }
 
 extension Processor {
@@ -144,10 +144,12 @@ extension Processor {
   }
 
   func _checkInvariants() {
-    assert(end <= input.endIndex)
-    assert(start >= input.startIndex)
-    assert(currentPosition >= start)
-    assert(currentPosition <= end)
+    assert(searchBounds.lowerBound >= subjectBounds.lowerBound)
+    assert(searchBounds.upperBound <= subjectBounds.upperBound)
+    assert(subjectBounds.lowerBound >= input.startIndex)
+    assert(subjectBounds.upperBound <= input.endIndex)
+    assert(currentPosition >= searchBounds.lowerBound)
+    assert(currentPosition <= searchBounds.upperBound)
   }
 }
 

From 7bf5b147b6be5f7396e7933b8ad5d28e3a54b969 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 12:30:34 -0500
Subject: [PATCH 13/18] Update buildAssert use-sites to use subjectBounds

This at least clarifies which boundaries are being used.
---
 Sources/_StringProcessing/ByteCodeGen.swift | 42 ++++++++++-----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index 95d34f568..f012b96bd 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -96,26 +96,26 @@ fileprivate extension Compiler.ByteCodeGen {
     // need to supply both a slice bounds and a per-search bounds.
     switch kind {
     case .startOfSubject:
-      builder.buildAssert { (input, pos, bounds) in
-        pos == bounds.lowerBound
+      builder.buildAssert { (input, pos, subjectBounds) in
+        pos == subjectBounds.lowerBound
       }
 
     case .endOfSubjectBeforeNewline:
-      builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-        if pos == bounds.upperBound { return true }
+      builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, subjectBounds) in
+        if pos == subjectBounds.upperBound { return true }
         switch semanticLevel {
         case .graphemeCluster:
-          return input.index(after: pos) == bounds.upperBound
+          return input.index(after: pos) == subjectBounds.upperBound
            && input[pos].isNewline
         case .unicodeScalar:
-          return input.unicodeScalars.index(after: pos) == bounds.upperBound
+          return input.unicodeScalars.index(after: pos) == subjectBounds.upperBound
            && input.unicodeScalars[pos].isNewline
         }
       }
 
     case .endOfSubject:
-      builder.buildAssert { (input, pos, bounds) in
-        pos == bounds.upperBound
+      builder.buildAssert { (input, pos, subjectBounds) in
+        pos == subjectBounds.upperBound
       }
 
     case .resetStartOfMatch:
@@ -127,7 +127,7 @@ fileprivate extension Compiler.ByteCodeGen {
       
       // FIXME: This needs to be based on `searchBounds`,
       // not the `subjectBounds` given as an argument here
-      builder.buildAssert { (input, pos, bounds) in false }
+      builder.buildAssert { (input, pos, subjectBounds) in false }
 
     case .textSegment:
       builder.buildAssert { (input, pos, _) in
@@ -144,8 +144,8 @@ fileprivate extension Compiler.ByteCodeGen {
     case .startOfLine:
       // FIXME: Anchor.startOfLine must always use this first branch
       if options.anchorsMatchNewlines {
-        builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-          if pos == bounds.lowerBound { return true }
+        builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, subjectBounds) in
+          if pos == subjectBounds.lowerBound { return true }
           switch semanticLevel {
           case .graphemeCluster:
             return input[input.index(before: pos)].isNewline
@@ -154,16 +154,16 @@ fileprivate extension Compiler.ByteCodeGen {
           }
         }
       } else {
-        builder.buildAssert { (input, pos, bounds) in
-          pos == bounds.lowerBound
+        builder.buildAssert { (input, pos, subjectBounds) in
+          pos == subjectBounds.lowerBound
         }
       }
       
     case .endOfLine:
       // FIXME: Anchor.endOfLine must always use this first branch
       if options.anchorsMatchNewlines {
-        builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-          if pos == bounds.upperBound { return true }
+        builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, subjectBounds) in
+          if pos == subjectBounds.upperBound { return true }
           switch semanticLevel {
           case .graphemeCluster:
             return input[pos].isNewline
@@ -172,25 +172,25 @@ fileprivate extension Compiler.ByteCodeGen {
           }
         }
       } else {
-        builder.buildAssert { (input, pos, bounds) in
-          pos == bounds.upperBound
+        builder.buildAssert { (input, pos, subjectBounds) in
+          pos == subjectBounds.upperBound
         }
       }
 
     case .wordBoundary:
       // TODO: May want to consider Unicode level
-      builder.buildAssert { [options] (input, pos, bounds) in
+      builder.buildAssert { [options] (input, pos, subjectBounds) in
         // TODO: How should we handle bounds?
         _CharacterClassModel.word.isBoundary(
-          input, at: pos, bounds: bounds, with: options)
+          input, at: pos, bounds: subjectBounds, with: options)
       }
 
     case .notWordBoundary:
       // TODO: May want to consider Unicode level
-      builder.buildAssert { [options] (input, pos, bounds) in
+      builder.buildAssert { [options] (input, pos, subjectBounds) in
         // TODO: How should we handle bounds?
         !_CharacterClassModel.word.isBoundary(
-          input, at: pos, bounds: bounds, with: options)
+          input, at: pos, bounds: subjectBounds, with: options)
       }
     }
   }

From 6ce6108c9c7468644d87ea1ff0e04cf574da2cb6 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 12:31:01 -0500
Subject: [PATCH 14/18] Additonal subject/searchBounds correctness.

---
 Sources/_StringProcessing/Engine/Processor.swift | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index 618f9a47e..46e82e11e 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -157,7 +157,7 @@ extension Processor {
   var slice: Input.SubSequence {
     // TODO: Should we whole-scale switch to slices, or
     // does that depend on options for some anchors?
-    input[subjectBounds]
+    input[searchBounds]
   }
 
   // Advance in our input, without any checks or failure signalling
@@ -465,7 +465,7 @@ extension Processor {
       let matcher = registers[matcherReg]
       do {
         guard let (nextIdx, val) = try matcher(
-          input, currentPosition, subjectBounds
+          input, currentPosition, searchBounds
         ) else {
           signalFailure()
           return

From 73ba7c7371b3a3056b62fa4eb3dbf99711f05777 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Tue, 21 Jun 2022 22:33:24 -0500
Subject: [PATCH 15/18] Add internal API for testing alternative syntaxes

---
 Sources/_StringProcessing/Regex/AnyRegexOutput.swift | 4 ++++
 Tests/RegexTests/MatchTests.swift                    | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift
index 156dd7220..8f9d0e010 100644
--- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift
+++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift
@@ -145,6 +145,10 @@ extension Regex where Output == AnyRegexOutput {
   public init(_ pattern: String) throws {
     self.init(ast: try parse(pattern, .semantic, .traditional))
   }
+  
+  internal init(_ pattern: String, syntax: SyntaxOptions) throws {
+    self.init(ast: try parse(pattern, .semantic, syntax))
+  }
 }
 
 @available(SwiftStdlib 5.7, *)
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index 67c1346b4..bf2cafb1d 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -25,7 +25,7 @@ func _firstMatch(
   input: String,
   syntax: SyntaxOptions = .traditional
 ) throws -> (String, [String?]) {
-  let regex = try Regex(regexStr)
+  let regex = try Regex(regexStr, syntax: syntax)
   guard let result = try regex.firstMatch(in: input) else {
     throw MatchError("match not found for \(regexStr) in \(input)")
   }
@@ -700,11 +700,11 @@ extension RegexTests {
     firstMatchTest(#"[\Qa-c\E]+"#, input: "a-c", match: "a-c")
 
     firstMatchTest(#"["a-c"]+"#, input: "abc", match: "a",
-                   syntax: .experimental, xfail: true)
+                   syntax: .experimental)
     firstMatchTest(#"["abc"]+"#, input: "cba", match: "cba",
                    syntax: .experimental)
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: "abc",
-                   syntax: .experimental, xfail: true)
+                   syntax: .experimental)
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: #""abc""#)
   }
 

From c1a6edf6015abf151d27de3e36b61fb707a32f89 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Wed, 22 Jun 2022 15:44:36 -0500
Subject: [PATCH 16/18] Add an XFAIL'd test for \G

---
 Tests/RegexTests/MatchTests.swift | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index bf2cafb1d..6bf7986ed 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -917,7 +917,7 @@ extension RegexTests {
       #"\d{3}(?<!USD\d{3})"#, input: "Price: JYP100", match: "100", xfail: true)
   }
 
-  func testMatchAnchors() {
+  func testMatchAnchors() throws {
     // MARK: Anchors
     firstMatchTests(
       #"^\d+"#,
@@ -985,7 +985,13 @@ extension RegexTests {
       ("123 456", "23"))
 
     // TODO: \G and \K
-
+    do {
+      let regex = try Regex(#"\Gab"#, as: Substring.self)
+      XCTExpectFailure {
+        XCTAssertEqual("abab".matches(of: regex).map(\.output), ["ab", "ab"])
+      }
+    }
+    
     // TODO: Oniguruma \y and \Y
     firstMatchTests(
       #"\u{65}"#,             // Scalar 'e' is present in both

From 8d8571fcb09cd6f712911ba65a93f97aeda99796 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Wed, 22 Jun 2022 15:45:17 -0500
Subject: [PATCH 17/18] Add XFAIL'd tests for Anchor.start/endOfLine

These anchors should always match at start/end of line, not just
when .anchorsMatchLineEndings() is turned on.
---
 Tests/RegexBuilderTests/RegexDSLTests.swift | 43 +++++++++++++++++++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
index fc31e575f..24d6f219e 100644
--- a/Tests/RegexBuilderTests/RegexDSLTests.swift
+++ b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -18,6 +18,7 @@ class RegexDSLTests: XCTestCase {
     _ tests: (input: String, expectedCaptures: MatchType?)...,
     matchType: MatchType.Type,
     _ equivalence: (MatchType, MatchType) -> Bool,
+    xfail: Bool = false,
     file: StaticString = #file,
     line: UInt = #line,
     @RegexComponentBuilder _ content: () -> Content
@@ -25,8 +26,13 @@ class RegexDSLTests: XCTestCase {
     let regex = content()
     for (input, maybeExpectedCaptures) in tests {
       let maybeMatch = input.wholeMatch(of: regex)
-      if let expectedCaptures = maybeExpectedCaptures {
-        let match = try XCTUnwrap(maybeMatch, file: file, line: line)
+      if let expectedCaptures = maybeExpectedCaptures,
+        let match = maybeMatch
+      {
+        if xfail {
+          XCTFail("Unexpectedly matched", file: file, line: line)
+          continue
+        }
         XCTAssertTrue(
           type(of: regex).RegexOutput.self == MatchType.self,
           """
@@ -39,7 +45,9 @@ class RegexDSLTests: XCTestCase {
           "'\(captures)' is not equal to the expected '\(expectedCaptures)'.",
           file: file, line: line)
       } else {
-        XCTAssertNil(maybeMatch, file: file, line: line)
+        if !xfail {
+          XCTAssertNil(maybeMatch, file: file, line: line)
+        }
       }
     }
   }
@@ -525,6 +533,35 @@ class RegexDSLTests: XCTestCase {
       NegativeLookahead { "2" }
       CharacterClass.word
     }
+    
+    try _testDSLCaptures(
+      ("aaa", "aaa"),
+      ("\naaa", nil),
+      ("aaa\n", nil),
+      ("\naaa\n", nil),
+      matchType: Substring.self, ==)
+    {
+      Regex {
+        Anchor.startOfSubject
+        Repeat("a", count: 3)
+        Anchor.endOfSubject
+      }.anchorsMatchLineEndings()
+    }
+    
+    // FIXME: Anchor.start/endOfLine needs to always match line endings,
+    // even when the `anchorsMatchLineEndings()` option is turned off.
+    try _testDSLCaptures(
+      ("\naaa", "aaa"),
+      ("aaa\n", "aaa"),
+      ("\naaa\n", "aaa"),
+      matchType: Substring.self, ==, xfail: true)
+    {
+      Regex {
+        Anchor.startOfLine
+        Repeat("a", count: 3)
+        Anchor.endOfLine
+      }
+    }
   }
 
   func testNestedGroups() throws {

From 94612b2c92d68ea7c0f3815c1c24e2ce9b1ff7cb Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Wed, 22 Jun 2022 15:48:43 -0500
Subject: [PATCH 18/18] Improve FIXME comments

---
 Sources/_StringProcessing/ByteCodeGen.swift | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index ccdc10abf..9b9f02ff7 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -143,6 +143,9 @@ fileprivate extension Compiler.ByteCodeGen {
 
     case .startOfLine:
       // FIXME: Anchor.startOfLine must always use this first branch
+      // The behavior of `^` should depend on `anchorsMatchNewlines`, but
+      // the DSL-based `.startOfLine` anchor should always match the start
+      // of a line. Right now we don't distinguish between those anchors.
       if options.anchorsMatchNewlines {
         builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, subjectBounds) in
           if pos == subjectBounds.lowerBound { return true }
@@ -161,6 +164,9 @@ fileprivate extension Compiler.ByteCodeGen {
       
     case .endOfLine:
       // FIXME: Anchor.endOfLine must always use this first branch
+      // The behavior of `$` should depend on `anchorsMatchNewlines`, but
+      // the DSL-based `.endOfLine` anchor should always match the end
+      // of a line. Right now we don't distinguish between those anchors.
       if options.anchorsMatchNewlines {
         builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, subjectBounds) in
           if pos == subjectBounds.upperBound { return true }