diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index b6f9b4732..86309bb8a 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -587,11 +587,11 @@ extension Compiler.ByteCodeGen { try emitConcatenationComponent(child) } - case let .capture(_, refId, child): + case let .capture(name, refId, child): options.beginScope() defer { options.endScope() } - let cap = builder.makeCapture(id: refId) + let cap = builder.makeCapture(id: refId, name: name) switch child { case let .matcher(_, m): emitMatcher(m, into: cap) diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 7cf94f6ef..2b38ace0a 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -45,6 +45,7 @@ extension MEProgram where Input.Element: Hashable { // Symbolic reference resolution var unresolvedReferences: [ReferenceID: [InstructionAddress]] = [:] var referencedCaptureOffsets: [ReferenceID: Int] = [:] + var namedCaptureOffsets: [String: Int] = [:] var captureCount: Int { // We currently deduce the capture count from the capture register number. nextCaptureRegister.rawValue @@ -353,7 +354,8 @@ extension MEProgram.Builder { staticMatcherFunctions: matcherFunctions, registerInfo: regInfo, captureStructure: captureStructure, - referencedCaptureOffsets: referencedCaptureOffsets) + referencedCaptureOffsets: referencedCaptureOffsets, + namedCaptureOffsets: namedCaptureOffsets) } mutating func reset() { self = Self() } @@ -438,7 +440,9 @@ fileprivate extension MEProgram.Builder { // Register helpers extension MEProgram.Builder { - mutating func makeCapture(id: ReferenceID?) -> CaptureRegister { + mutating func makeCapture( + id: ReferenceID?, name: String? + ) -> CaptureRegister { defer { nextCaptureRegister.rawValue += 1 } // Register the capture for later lookup via symbolic references. if let id = id { @@ -446,6 +450,10 @@ extension MEProgram.Builder { captureCount, forKey: id) assert(preexistingValue == nil) } + if let name = name { + // TODO: Reject duplicate capture names unless `(?J)`? + namedCaptureOffsets.updateValue(captureCount, forKey: name) + } return nextCaptureRegister } diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 390af7d66..807598637 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -145,6 +145,7 @@ extension Processor._StoredCapture: CustomStringConvertible { struct CaptureList { var values: Array._StoredCapture> var referencedCaptureOffsets: [ReferenceID: Int] + var namedCaptureOffsets: [String: Int] // func extract(from s: String) -> Array> { // caps.map { $0.map { s[$0] } } diff --git a/Sources/_StringProcessing/Engine/MEProgram.swift b/Sources/_StringProcessing/Engine/MEProgram.swift index b0f2e6a79..0bfa0ecba 100644 --- a/Sources/_StringProcessing/Engine/MEProgram.swift +++ b/Sources/_StringProcessing/Engine/MEProgram.swift @@ -36,6 +36,7 @@ struct MEProgram where Input.Element: Equatable { let captureStructure: CaptureStructure let referencedCaptureOffsets: [ReferenceID: Int] + let namedCaptureOffsets: [String: Int] } extension MEProgram: CustomStringConvertible { diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index c7d4527a5..6ebb93f5c 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -37,7 +37,8 @@ struct Executor { let capList = CaptureList( values: cpu.storedCaptures, - referencedCaptureOffsets: engine.program.referencedCaptureOffsets) + referencedCaptureOffsets: engine.program.referencedCaptureOffsets, + namedCaptureOffsets: engine.program.namedCaptureOffsets) let capStruct = engine.program.captureStructure let range = inputRange.lowerBound.. Substring { input[range] } + + public subscript(name: String) -> AnyRegexOutput.Element? { + namedCaptureOffsets[name].map { self[$0 + 1] } + } } /// A type-erased regex output @available(SwiftStdlib 5.7, *) public struct AnyRegexOutput { let input: String + let namedCaptureOffsets: [String: Int] fileprivate let _elements: [ElementRepresentation] /// The underlying representation of the element of a type-erased regex @@ -94,9 +99,12 @@ extension AnyRegexOutput { @available(SwiftStdlib 5.7, *) extension AnyRegexOutput { internal init( - input: String, elements: C + input: String, namedCaptureOffsets: [String: Int], elements: C ) where C.Element == StructuredCapture { - self.init(input: input, _elements: elements.map(ElementRepresentation.init)) + self.init( + input: input, + namedCaptureOffsets: namedCaptureOffsets, + _elements: elements.map(ElementRepresentation.init)) } } @@ -170,6 +178,13 @@ extension AnyRegexOutput: RandomAccessCollection { } } +@available(SwiftStdlib 5.7, *) +extension AnyRegexOutput { + public subscript(name: String) -> Element? { + namedCaptureOffsets[name].map { self[$0 + 1] } + } +} + @available(SwiftStdlib 5.7, *) extension Regex.Match where Output == AnyRegexOutput { /// Creates a type-erased regex match from an existing match. diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index a86899041..4b2f117e4 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -26,6 +26,8 @@ extension Regex { let referencedCaptureOffsets: [ReferenceID: Int] + let namedCaptureOffsets: [String: Int] + let value: Any? } } @@ -40,6 +42,7 @@ extension Regex.Match { storedCapture: StoredCapture(range: range, value: nil)) let output = AnyRegexOutput( input: input, + namedCaptureOffsets: namedCaptureOffsets, elements: [wholeMatchAsCapture] + rawCaptures) return output as! Output } else if Output.self == Substring.self { diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 897bca8f7..58f847f32 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -689,7 +689,9 @@ class RegexDSLTests: XCTestCase { } do { let regex = try Regex( - compiling: #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#) + compiling: #""" + (?[0-9A-F]+)(?:\.\.(?[0-9A-F]+))?\s+;\s+(?\w+).* + """#) let line = """ A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM \ COMBINING MARK TUKWENTIS @@ -699,13 +701,16 @@ class RegexDSLTests: XCTestCase { let output = match.output XCTAssertEqual(output[0].substring, line[...]) XCTAssertTrue(output[1].substring == "A6F0") + XCTAssertTrue(output["lower"]?.substring == "A6F0") XCTAssertTrue(output[2].substring == "A6F1") + XCTAssertTrue(output["upper"]?.substring == "A6F1") XCTAssertTrue(output[3].substring == "Extend") + XCTAssertTrue(output["desc"]?.substring == "Extend") let typedOutput = try XCTUnwrap(output.as( - (Substring, Substring, Substring?, Substring).self)) + (Substring, lower: Substring, upper: Substring?, Substring).self)) XCTAssertEqual(typedOutput.0, line[...]) - XCTAssertTrue(typedOutput.1 == "A6F0") - XCTAssertTrue(typedOutput.2 == "A6F1") + XCTAssertTrue(typedOutput.lower == "A6F0") + XCTAssertTrue(typedOutput.upper == "A6F1") XCTAssertTrue(typedOutput.3 == "Extend") } }