From 3f549411cfbf5c7dd5452c7d8375505e2ef03343 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Tue, 3 May 2022 13:59:39 -0700 Subject: [PATCH 1/5] Implement .as for Regex --- .../Utility/TypeConstruction.swift | 103 +++++++----------- .../Regex/AnyRegexOutput.swift | 10 +- .../Utility/TypeVerification.swift | 50 +++++++++ Tests/RegexTests/CaptureTests.swift | 18 ++- Tests/RegexTests/MatchTests.swift | 2 +- 5 files changed, 118 insertions(+), 65 deletions(-) create mode 100644 Sources/_StringProcessing/Utility/TypeVerification.swift diff --git a/Sources/_RegexParser/Utility/TypeConstruction.swift b/Sources/_RegexParser/Utility/TypeConstruction.swift index 524b24917..e368d3513 100644 --- a/Sources/_RegexParser/Utility/TypeConstruction.swift +++ b/Sources/_RegexParser/Utility/TypeConstruction.swift @@ -17,19 +17,7 @@ // const Metadata * const *elements, // const char *labels, // const ValueWitnessTable *proposedWitnesses); -// -// SWIFT_RUNTIME_EXPORT SWIFT_CC(swift) -// MetadataResponse -// swift_getTupleTypeMetadata2(MetadataRequest request, -// const Metadata *elt0, const Metadata *elt1, -// const char *labels, -// const ValueWitnessTable *proposedWitnesses); -// SWIFT_RUNTIME_EXPORT SWIFT_CC(swift) -// MetadataResponse -// swift_getTupleTypeMetadata3(MetadataRequest request, -// const Metadata *elt0, const Metadata *elt1, -// const Metadata *elt2, const char *labels, -// const ValueWitnessTable *proposedWitnesses); + @_silgen_name("swift_getTupleTypeMetadata") private func swift_getTupleTypeMetadata( @@ -40,31 +28,13 @@ private func swift_getTupleTypeMetadata( proposedWitnesses: UnsafeRawPointer? ) -> (value: Any.Type, state: Int) -@_silgen_name("swift_getTupleTypeMetadata2") -private func swift_getTupleTypeMetadata2( - request: Int, - element1: Any.Type, - element2: Any.Type, - labels: UnsafePointer?, - proposedWitnesses: UnsafeRawPointer? -) -> (value: Any.Type, state: Int) - -@_silgen_name("swift_getTupleTypeMetadata3") -private func swift_getTupleTypeMetadata3( - request: Int, - element1: Any.Type, - element2: Any.Type, - element3: Any.Type, - labels: UnsafePointer?, - proposedWitnesses: UnsafeRawPointer? -) -> (value: Any.Type, state: Int) - public enum TypeConstruction { /// Returns a tuple metatype of the given element types. public static func tupleType< ElementTypes: BidirectionalCollection >( - of elementTypes: __owned ElementTypes + of elementTypes: __owned ElementTypes, + labels: String? = nil ) -> Any.Type where ElementTypes.Element == Any.Type { // From swift/ABI/Metadata.h: // template @@ -78,39 +48,50 @@ public enum TypeConstruction { let elementCountFlag = 0x0000FFFF assert(elementTypes.count != 1, "A one-element tuple is not a realistic Swift type") assert(elementTypes.count <= elementCountFlag, "Tuple size exceeded \(elementCountFlag)") - switch elementTypes.count { - case 2: - return swift_getTupleTypeMetadata2( - request: 0, - element1: elementTypes[elementTypes.startIndex], - element2: elementTypes[elementTypes.index(elementTypes.startIndex, offsetBy: 1)], - labels: nil, - proposedWitnesses: nil).value - case 3: - return swift_getTupleTypeMetadata3( - request: 0, - element1: elementTypes[elementTypes.startIndex], - element2: elementTypes[elementTypes.index(elementTypes.startIndex, offsetBy: 1)], - element3: elementTypes[elementTypes.index(elementTypes.startIndex, offsetBy: 2)], - labels: nil, - proposedWitnesses: nil).value - default: - let result = elementTypes.withContiguousStorageIfAvailable { elementTypesBuffer in - swift_getTupleTypeMetadata( + + var flags = elementTypes.count + + // If we have labels to provide, then say the label pointer is not constant + // because the lifetime of said pointer will only be vaild for the lifetime + // of the 'swift_getTupleTypeMetadata' call. If we don't have labels, then + // our label pointer will be empty and constant. + if labels != nil { + // Has non constant labels + flags |= 0x10000 + } + + let result = elementTypes.withContiguousStorageIfAvailable { elementTypesBuffer in + if let labels = labels { + return labels.withCString { labelsPtr in + swift_getTupleTypeMetadata( + request: 0, + flags: flags, + elements: elementTypesBuffer.baseAddress, + labels: labelsPtr, + proposedWitnesses: nil + ) + } + } else { + return swift_getTupleTypeMetadata( request: 0, - flags: elementTypesBuffer.count, + flags: flags, elements: elementTypesBuffer.baseAddress, labels: nil, - proposedWitnesses: nil).value + proposedWitnesses: nil + ) } - guard let result = result else { - fatalError(""" - The collection of element types does not support an internal representation of - contiguous storage - """) - } - return result } + + guard let result = result else { + fatalError( + """ + The collection of element types does not support an internal representation of + contiguous storage + """ + ) + } + + return result.value } /// Creates a type-erased tuple with the given elements. diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index 00fc2e952..74e78e0b2 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -231,7 +231,7 @@ extension Regex where Output == AnyRegexOutput { /// Use this initializer to fit a regex with strongly typed captures into the /// use site of a dynamic regex, i.e. one that was created from a string. public init(_ regex: Regex) { - fatalError("FIXME: Not implemented") + self.init(node: regex.root) } /// Returns a typed regex by converting the underlying types. @@ -242,6 +242,12 @@ extension Regex where Output == AnyRegexOutput { public func `as`( _ type: Output.Type = Output.self ) -> Regex? { - fatalError("FIXME: Not implemented") + let result = Regex(node: root) + + guard result._verifyType() else { + return nil + } + + return result } } diff --git a/Sources/_StringProcessing/Utility/TypeVerification.swift b/Sources/_StringProcessing/Utility/TypeVerification.swift new file mode 100644 index 000000000..6ba491898 --- /dev/null +++ b/Sources/_StringProcessing/Utility/TypeVerification.swift @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +@_implementationOnly import _RegexParser + +@available(SwiftStdlib 5.7, *) +extension Regex { + internal func _verifyType() -> Bool { + var tupleElements: [Any.Type] = [Substring.self] + var labels = " " + + for capture in program.tree.root._captureList.captures { + var captureType: Any.Type = capture.type ?? Substring.self + var i = capture.optionalDepth + + while i != 0 { + captureType = TypeConstruction.optionalType(of: captureType) + i -= 1 + } + + tupleElements.append(captureType) + + if let name = capture.name { + labels += name + } + + labels.unicodeScalars.append(" ") + } + + // If we have no captures, then our Regex must be Regex. + if tupleElements.count == 1 { + return Output.self == Substring.self + } + + let createdType = TypeConstruction.tupleType( + of: tupleElements, + labels: labels + ) + + return Output.self == createdType + } +} diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index b48e1f0a5..81019f3ff 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -451,7 +451,23 @@ extension RegexTests { // TODO: "((a|b)|c)*" } - + + func testTypeVerification() throws { + let opaque1 = try Regex("abc") + let concrete1 = try XCTUnwrap(opaque1.as(Substring.self)) + XCTAssertNil(opaque1.as((Substring, Substring).self)) + XCTAssertNil(opaque1.as(Int.self)) + + let opaque2 = try Regex("(abc)") + let concrete2 = try XCTUnwrap(opaque2.as((Substring, Substring).self)) + XCTAssertNil(opaque2.as(Substring.self)) + XCTAssertNil(opaque2.as((Substring, Int).self)) + + let opaque3 = try Regex("(?abc)") + let concrete3 = try XCTUnwrap(opaque3.as((Substring, someLabel: Substring).self)) + XCTAssertNil(opaque3.as((Substring, Substring).self)) + XCTAssertNil(opaque3.as(Substring.self)) + } } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 2c6b858cc..16b19dd49 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -35,7 +35,7 @@ extension Executor { in: start.. Date: Tue, 3 May 2022 13:59:58 -0700 Subject: [PATCH 2/5] Unify Match and AnyRegexOutput --- Sources/_StringProcessing/Capture.swift | 16 +++-- Sources/_StringProcessing/Executor.swift | 15 +++-- .../Regex/AnyRegexOutput.swift | 58 ++++++++----------- Sources/_StringProcessing/Regex/Match.swift | 36 +++++++----- Tests/RegexTests/CaptureTests.swift | 20 +++---- 5 files changed, 75 insertions(+), 70 deletions(-) diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index 51428acee..078aaf127 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -61,26 +61,30 @@ func constructExistentialOutputComponent( return underlying } -extension StructuredCapture { +@available(SwiftStdlib 5.7, *) +extension AnyRegexOutput.Element { func existentialOutputComponent( from input: Substring ) -> Any { constructExistentialOutputComponent( from: input, - in: storedCapture?.range, - value: storedCapture?.value, - optionalCount: optionalCount) + in: range, + value: value, + optionalCount: optionalDepth + ) } func slice(from input: String) -> Substring? { - guard let r = storedCapture?.range else { return nil } + guard let r = range else { return nil } return input[r] } } -extension Sequence where Element == StructuredCapture { +@available(SwiftStdlib 5.7, *) +extension Sequence where Element == AnyRegexOutput.Element { // FIXME: This is a stop gap where we still slice the input // and traffic through existentials + @available(SwiftStdlib 5.7, *) func existentialOutput( from input: Substring ) -> Any { diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index e44b110e5..d66f88849 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -55,14 +55,19 @@ struct Executor { } else { value = nil } - - return .init( + + let anyRegexOutput = AnyRegexOutput( input: input, + namedCaptureOffsets: capList.namedCaptureOffsets, + elements: caps + ) + + return .init( + anyRegexOutput: anyRegexOutput, range: range, - rawCaptures: caps, referencedCaptureOffsets: capList.referencedCaptureOffsets, - namedCaptureOffsets: capList.namedCaptureOffsets, - value: value) + value: value + ) } @available(SwiftStdlib 5.7, *) diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index 74e78e0b2..f19316fd8 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -41,11 +41,11 @@ extension Regex.Match where Output == AnyRegexOutput { public subscript( dynamicMember keyPath: KeyPath<(Substring, _doNotUse: ()), Substring> ) -> Substring { - input[range] + anyRegexOutput.input[range] } public subscript(name: String) -> AnyRegexOutput.Element? { - namedCaptureOffsets[name].map { self[$0 + 1] } + anyRegexOutput.namedCaptureOffsets[name].map { self[$0 + 1] } } } @@ -54,17 +54,19 @@ extension Regex.Match where Output == AnyRegexOutput { public struct AnyRegexOutput { let input: String let namedCaptureOffsets: [String: Int] - fileprivate let _elements: [ElementRepresentation] + let _elements: [ElementRepresentation] /// The underlying representation of the element of a type-erased regex /// output. - fileprivate struct ElementRepresentation { + internal struct ElementRepresentation { /// The depth of `Optioals`s wrapping the underlying value. For example, /// `Substring` has optional depth `0`, and `Int??` has optional depth `2`. let optionalDepth: Int /// The bounds of the output element. let bounds: Range? + /// If the output vaule is strongly typed, then this will be set. + var value: Any? = nil } } @@ -75,14 +77,7 @@ extension AnyRegexOutput { /// Use this initializer to fit a regex with strongly typed captures into the /// use site of a dynamic regex, like one that was created from a string. public init(_ match: Regex.Match) { - // Note: We use type equality instead of `match.output as? ...` to prevent - // unexpected optional flattening. - if Output.self == AnyRegexOutput.self { - self = match.output as! AnyRegexOutput - return - } - fatalError("FIXME: Not implemented") - // self.init(input: match.input, _elements: ) + self = match.anyRegexOutput } /// Returns a typed output by converting the underlying value to the specified @@ -92,11 +87,8 @@ extension AnyRegexOutput { /// - Returns: The output, if the underlying value can be converted to the /// output type; otherwise `nil`. public func `as`(_ type: Output.Type = Output.self) -> Output? { - let elements = _elements.map { - StructuredCapture( - optionalCount: $0.optionalDepth, - storedCapture: .init(range: $0.bounds) - ).existentialOutputComponent(from: input[...]) + let elements = map { + $0.existentialOutputComponent(from: input[...]) } return TypeConstruction.tuple(of: elements) as? Output } @@ -110,7 +102,8 @@ extension AnyRegexOutput { self.init( input: input, namedCaptureOffsets: namedCaptureOffsets, - _elements: elements.map(ElementRepresentation.init)) + _elements: elements.map(ElementRepresentation.init) + ) } } @@ -119,7 +112,9 @@ extension AnyRegexOutput.ElementRepresentation { init(_ element: StructuredCapture) { self.init( optionalDepth: element.optionalCount, - bounds: element.storedCapture.flatMap(\.range)) + bounds: element.storedCapture.flatMap(\.range), + value: element.storedCapture.flatMap(\.value) + ) } func value(forInput input: String) -> Any { @@ -142,6 +137,10 @@ extension AnyRegexOutput: RandomAccessCollection { public struct Element { fileprivate let representation: ElementRepresentation let input: String + + var optionalDepth: Int { + representation.optionalDepth + } /// The range over which a value was captured. `nil` for no-capture. public var range: Range? { @@ -155,7 +154,7 @@ extension AnyRegexOutput: RandomAccessCollection { /// The captured value, `nil` for no-capture public var value: Any? { - fatalError() + representation.value } } @@ -198,19 +197,12 @@ extension Regex.Match where Output == AnyRegexOutput { /// Use this initializer to fit a regex match with strongly typed captures into the /// use site of a dynamic regex match, like one that was created from a string. public init(_ match: Regex.Match) { - fatalError("FIXME: Not implemented") - } - - /// Returns a typed match by converting the underlying values to the specified - /// types. - /// - /// - Parameter type: The expected output type. - /// - Returns: A match generic over the output type, if the underlying values - /// can be converted to the output type; otherwise, `nil`. - public func `as`( - _ type: Output.Type = Output.self - ) -> Regex.Match? { - fatalError("FIXME: Not implemented") + self.init( + anyRegexOutput: match.anyRegexOutput, + range: match.range, + referencedCaptureOffsets: match.referencedCaptureOffsets, + value: match.value + ) } } diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index 8172e993b..5b4c29c1e 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -17,17 +17,13 @@ extension Regex { /// providing direct access to captures. @dynamicMemberLookup public struct Match { - let input: String + let anyRegexOutput: AnyRegexOutput /// The range of the overall match. public let range: Range - let rawCaptures: [StructuredCapture] - let referencedCaptureOffsets: [ReferenceID: Int] - let namedCaptureOffsets: [String: Int] - let value: Any? } } @@ -37,18 +33,23 @@ extension Regex.Match { /// The output produced from the match operation. public var output: Output { if Output.self == AnyRegexOutput.self { - let wholeMatchAsCapture = StructuredCapture( - optionalCount: 0, - storedCapture: StoredCapture(range: range, value: nil)) + let wholeMatchCapture = AnyRegexOutput.ElementRepresentation( + optionalDepth: 0, + bounds: range, + value: nil + ) + let output = AnyRegexOutput( - input: input, - namedCaptureOffsets: namedCaptureOffsets, - elements: [wholeMatchAsCapture] + rawCaptures) + input: anyRegexOutput.input, + namedCaptureOffsets: anyRegexOutput.namedCaptureOffsets, + _elements: [wholeMatchCapture] + anyRegexOutput._elements + ) + return output as! Output } else if Output.self == Substring.self { // FIXME: Plumb whole match (`.0`) through the matching engine. - return input[range] as! Output - } else if rawCaptures.isEmpty, value != nil { + return anyRegexOutput.input[range] as! Output + } else if anyRegexOutput.isEmpty, value != nil { // FIXME: This is a workaround for whole-match values not // being modeled as part of captures. We might want to // switch to a model where results are alongside captures @@ -57,7 +58,9 @@ extension Regex.Match { guard value == nil else { fatalError("FIXME: what would this mean?") } - let typeErasedMatch = rawCaptures.existentialOutput(from: input[range]) + let typeErasedMatch = anyRegexOutput.existentialOutput( + from: anyRegexOutput.input[range] + ) return typeErasedMatch as! Output } } @@ -81,8 +84,9 @@ extension Regex.Match { preconditionFailure( "Reference did not capture any match in the regex") } - return rawCaptures[offset].existentialOutputComponent(from: input[...]) - as! Capture + return anyRegexOutput[offset].existentialOutputComponent( + from: anyRegexOutput.input[...] + ) as! Capture } } diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 81019f3ff..8a9423544 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -48,20 +48,20 @@ extension CaptureList { } } -extension StructuredCapture { +extension AnyRegexOutput.Element { func formatStringCapture(input: String) -> String { - var res = String(repeating: "some(", count: someCount) - if let r = self.storedCapture?.range { + var res = String(repeating: "some(", count: optionalDepth) + if let r = range { res += input[r] } else { res += "none" } - res += String(repeating: ")", count: someCount) + res += String(repeating: ")", count: optionalDepth) return res } } -extension Sequence where Element == StructuredCapture { +extension AnyRegexOutput { func formatStringCaptures(input: String) -> String { var res = "[" res += self.map { @@ -111,13 +111,13 @@ extension StringCapture: CustomStringConvertible { extension StringCapture { func isEqual( - to structCap: StructuredCapture, + to structCap: AnyRegexOutput.Element, in input: String ) -> Bool { - guard optionalCount == structCap.optionalCount else { + guard optionalCount == structCap.optionalDepth else { return false } - guard let r = structCap.storedCapture?.range else { + guard let r = structCap.range else { return contents == nil } guard let s = contents else { @@ -194,7 +194,7 @@ func captureTest( return } - let caps = result.rawCaptures + let caps = result.anyRegexOutput guard caps.count == output.count else { XCTFail(""" Mismatch capture count: @@ -205,7 +205,7 @@ func captureTest( """) continue } - + guard output.elementsEqual(caps, by: { $0.isEqual(to: $1, in: input) }) else { From a53a40b67f22ec746e15cd05bdba1a45d6c7bb06 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Tue, 10 May 2022 16:13:39 -0700 Subject: [PATCH 3/5] Remove the namedCaptureOffset and StructuredCapture --- Sources/_StringProcessing/Capture.swift | 21 ----------- .../Engine/Structuralize.swift | 27 ++++++++------ Sources/_StringProcessing/Executor.swift | 3 +- .../Regex/AnyRegexOutput.swift | 37 ++++++++++--------- Sources/_StringProcessing/Regex/Match.swift | 1 - .../Utility/TypeVerification.swift | 5 ++- Tests/RegexTests/CaptureTests.swift | 6 +-- 7 files changed, 43 insertions(+), 57 deletions(-) diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index 078aaf127..fe00bdc0f 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -11,27 +11,6 @@ @_implementationOnly import _RegexParser -/// A structured capture -struct StructuredCapture { - /// The `.optional` height of the result - var optionalCount = 0 - - var storedCapture: StoredCapture? - - var someCount: Int { - storedCapture == nil ? optionalCount - 1 : optionalCount - } -} - -/// A storage form for a successful capture -struct StoredCapture { - // TODO: drop optional when engine tracks all ranges - var range: Range? - - // If strongly typed, value is set - var value: Any? = nil -} - // TODO: Where should this live? Inside TypeConstruction? func constructExistentialOutputComponent( from input: Substring, diff --git a/Sources/_StringProcessing/Engine/Structuralize.swift b/Sources/_StringProcessing/Engine/Structuralize.swift index a8cfeb20c..e7177a152 100644 --- a/Sources/_StringProcessing/Engine/Structuralize.swift +++ b/Sources/_StringProcessing/Engine/Structuralize.swift @@ -1,20 +1,25 @@ @_implementationOnly import _RegexParser - extension CaptureList { - func structuralize( + @available(SwiftStdlib 5.7, *) + func createElements( _ list: MECaptureList, _ input: String - ) -> [StructuredCapture] { + ) -> [AnyRegexOutput.ElementRepresentation] { assert(list.values.count == captures.count) - - var result = [StructuredCapture]() - for (cap, meStored) in zip(self.captures, list.values) { - let stored = StoredCapture( - range: meStored.latest, value: meStored.latestValue) - - result.append(.init( - optionalCount: cap.optionalDepth, storedCapture: stored)) + + var result = [AnyRegexOutput.ElementRepresentation]() + + for (cap, meStored) in zip(captures, list.values) { + let element = AnyRegexOutput.ElementRepresentation( + optionalDepth: cap.optionalDepth, + bounds: meStored.latest, + name: cap.name, + value: meStored.latestValue + ) + + result.append(element) } + return result } } diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index d66f88849..391ffb0b7 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -41,7 +41,7 @@ struct Executor { namedCaptureOffsets: engine.program.namedCaptureOffsets) let range = inputRange.lowerBound.. AnyRegexOutput.Element? { - anyRegexOutput.namedCaptureOffsets[name].map { self[$0 + 1] } + anyRegexOutput.first { + $0.name == name + } } } @@ -53,7 +55,6 @@ extension Regex.Match where Output == AnyRegexOutput { @available(SwiftStdlib 5.7, *) public struct AnyRegexOutput { let input: String - let namedCaptureOffsets: [String: Int] let _elements: [ElementRepresentation] /// The underlying representation of the element of a type-erased regex @@ -65,6 +66,10 @@ public struct AnyRegexOutput { /// The bounds of the output element. let bounds: Range? + + /// The name of the capture. + var name: String? = nil + /// If the output vaule is strongly typed, then this will be set. var value: Any? = nil } @@ -96,27 +101,16 @@ extension AnyRegexOutput { @available(SwiftStdlib 5.7, *) extension AnyRegexOutput { - internal init( - input: String, namedCaptureOffsets: [String: Int], elements: C - ) where C.Element == StructuredCapture { + internal init(input: String, elements: [ElementRepresentation]) { self.init( input: input, - namedCaptureOffsets: namedCaptureOffsets, - _elements: elements.map(ElementRepresentation.init) + _elements: elements ) } } @available(SwiftStdlib 5.7, *) extension AnyRegexOutput.ElementRepresentation { - init(_ element: StructuredCapture) { - self.init( - optionalDepth: element.optionalCount, - bounds: element.storedCapture.flatMap(\.range), - value: element.storedCapture.flatMap(\.value) - ) - } - func value(forInput input: String) -> Any { // Ok for now because `existentialMatchComponent` // wont slice the input if there's no range to slice with @@ -128,7 +122,8 @@ extension AnyRegexOutput.ElementRepresentation { from: input, in: bounds, value: nil, - optionalCount: optionalDepth) + optionalCount: optionalDepth + ) } } @@ -141,7 +136,11 @@ extension AnyRegexOutput: RandomAccessCollection { var optionalDepth: Int { representation.optionalDepth } - + + var name: String? { + representation.name + } + /// The range over which a value was captured. `nil` for no-capture. public var range: Range? { representation.bounds @@ -186,7 +185,9 @@ extension AnyRegexOutput: RandomAccessCollection { @available(SwiftStdlib 5.7, *) extension AnyRegexOutput { public subscript(name: String) -> Element? { - namedCaptureOffsets[name].map { self[$0 + 1] } + first { + $0.name == name + } } } diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index 5b4c29c1e..950a23e84 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -41,7 +41,6 @@ extension Regex.Match { let output = AnyRegexOutput( input: anyRegexOutput.input, - namedCaptureOffsets: anyRegexOutput.namedCaptureOffsets, _elements: [wholeMatchCapture] + anyRegexOutput._elements ) diff --git a/Sources/_StringProcessing/Utility/TypeVerification.swift b/Sources/_StringProcessing/Utility/TypeVerification.swift index 6ba491898..df0b59f2c 100644 --- a/Sources/_StringProcessing/Utility/TypeVerification.swift +++ b/Sources/_StringProcessing/Utility/TypeVerification.swift @@ -42,7 +42,10 @@ extension Regex { let createdType = TypeConstruction.tupleType( of: tupleElements, - labels: labels + + // If all of our labels are spaces, that means no actual label was added + // to the tuple. In that case, don't pass a label string. + labels: labels.all { $0 == " " } ? nil : labels ) return Output.self == createdType diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 8a9423544..9d0c03a7e 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -454,17 +454,17 @@ extension RegexTests { func testTypeVerification() throws { let opaque1 = try Regex("abc") - let concrete1 = try XCTUnwrap(opaque1.as(Substring.self)) + _ = try XCTUnwrap(opaque1.as(Substring.self)) XCTAssertNil(opaque1.as((Substring, Substring).self)) XCTAssertNil(opaque1.as(Int.self)) let opaque2 = try Regex("(abc)") - let concrete2 = try XCTUnwrap(opaque2.as((Substring, Substring).self)) + _ = try XCTUnwrap(opaque2.as((Substring, Substring).self)) XCTAssertNil(opaque2.as(Substring.self)) XCTAssertNil(opaque2.as((Substring, Int).self)) let opaque3 = try Regex("(?abc)") - let concrete3 = try XCTUnwrap(opaque3.as((Substring, someLabel: Substring).self)) + _ = try XCTUnwrap(opaque3.as((Substring, someLabel: Substring).self)) XCTAssertNil(opaque3.as((Substring, Substring).self)) XCTAssertNil(opaque3.as(Substring.self)) } From 21f7910420d02cdda3bd22ed557b79f9c0af5211 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 12 May 2022 08:46:24 -0700 Subject: [PATCH 4/5] Subsume referencedCaptureOffsets --- .../_StringProcessing/Engine/Structuralize.swift | 4 +++- Sources/_StringProcessing/Executor.swift | 1 - .../_StringProcessing/Regex/AnyRegexOutput.swift | 10 ++++++++-- Sources/_StringProcessing/Regex/Match.swift | 15 +++++++-------- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Structuralize.swift b/Sources/_StringProcessing/Engine/Structuralize.swift index e7177a152..129ac1677 100644 --- a/Sources/_StringProcessing/Engine/Structuralize.swift +++ b/Sources/_StringProcessing/Engine/Structuralize.swift @@ -1,4 +1,5 @@ @_implementationOnly import _RegexParser + extension CaptureList { @available(SwiftStdlib 5.7, *) func createElements( @@ -9,11 +10,12 @@ extension CaptureList { var result = [AnyRegexOutput.ElementRepresentation]() - for (cap, meStored) in zip(captures, list.values) { + for (i, (cap, meStored)) in zip(captures, list.values).enumerated() { let element = AnyRegexOutput.ElementRepresentation( optionalDepth: cap.optionalDepth, bounds: meStored.latest, name: cap.name, + referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key, value: meStored.latestValue ) diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 391ffb0b7..532a41256 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -64,7 +64,6 @@ struct Executor { return .init( anyRegexOutput: anyRegexOutput, range: range, - referencedCaptureOffsets: capList.referencedCaptureOffsets, value: value ) } diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index fae93b719..a3372e0db 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -70,6 +70,9 @@ public struct AnyRegexOutput { /// The name of the capture. var name: String? = nil + /// The capture reference this element refers to. + var referenceID: ReferenceID? = nil + /// If the output vaule is strongly typed, then this will be set. var value: Any? = nil } @@ -145,7 +148,11 @@ extension AnyRegexOutput: RandomAccessCollection { public var range: Range? { representation.bounds } - + + var referenceID: ReferenceID? { + representation.referenceID + } + /// The slice of the input over which a value was captured. `nil` for no-capture. public var substring: Substring? { range.map { input[$0] } @@ -201,7 +208,6 @@ extension Regex.Match where Output == AnyRegexOutput { self.init( anyRegexOutput: match.anyRegexOutput, range: match.range, - referencedCaptureOffsets: match.referencedCaptureOffsets, value: match.value ) } diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index 950a23e84..6c5100915 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -22,8 +22,6 @@ extension Regex { /// The range of the overall match. public let range: Range - let referencedCaptureOffsets: [ReferenceID: Int] - let value: Any? } } @@ -35,8 +33,7 @@ extension Regex.Match { if Output.self == AnyRegexOutput.self { let wholeMatchCapture = AnyRegexOutput.ElementRepresentation( optionalDepth: 0, - bounds: range, - value: nil + bounds: range ) let output = AnyRegexOutput( @@ -79,11 +76,13 @@ extension Regex.Match { @_spi(RegexBuilder) public subscript(_ id: ReferenceID) -> Capture { - guard let offset = referencedCaptureOffsets[id] else { - preconditionFailure( - "Reference did not capture any match in the regex") + guard let element = anyRegexOutput.first( + where: { $0.referenceID == id } + ) else { + preconditionFailure("Reference did not capture any match in the regex") } - return anyRegexOutput[offset].existentialOutputComponent( + + return element.existentialOutputComponent( from: anyRegexOutput.input[...] ) as! Capture } From c7b70a4c42cd8a35eb380af00627d44e85b27fe0 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 12 May 2022 08:55:36 -0700 Subject: [PATCH 5/5] Add optional tests --- Tests/RegexTests/CaptureTests.swift | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 9d0c03a7e..ece5347c2 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -467,6 +467,16 @@ extension RegexTests { _ = try XCTUnwrap(opaque3.as((Substring, someLabel: Substring).self)) XCTAssertNil(opaque3.as((Substring, Substring).self)) XCTAssertNil(opaque3.as(Substring.self)) + + let opaque4 = try Regex("(?abc)?") + _ = try XCTUnwrap(opaque4.as((Substring, somethingHere: Substring?).self)) + XCTAssertNil(opaque4.as((Substring, somethignHere: Substring).self)) + XCTAssertNil(opaque4.as((Substring, Substring?).self)) + + let opaque5 = try Regex("((a)?bc)?") + _ = try XCTUnwrap(opaque5.as((Substring, Substring?, Substring??).self)) + XCTAssertNil(opaque5.as((Substring, somethingHere: Substring?, here: Substring??).self)) + XCTAssertNil(opaque5.as((Substring, Substring?, Substring?).self)) } }