From f465def9b367f483fdf2bdb632ce776c8d2426a8 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 5 Jul 2022 22:16:04 +0100 Subject: [PATCH 1/6] Remove DSLTree.Node.regexLiteral This doesn't appear to be used. --- Sources/_StringProcessing/ByteCodeGen.swift | 3 --- Sources/_StringProcessing/ConsumerInterface.swift | 4 ---- Sources/_StringProcessing/PrintAsPattern.swift | 3 --- Sources/_StringProcessing/Regex/DSLTree.swift | 15 +++------------ 4 files changed, 3 insertions(+), 22 deletions(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index d43a3a3f4..d18d50aa0 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -812,9 +812,6 @@ fileprivate extension Compiler.ByteCodeGen { } } - case let .regexLiteral(l): - return try emitNode(l.ast.dslTreeNode) - case let .convertedRegexLiteral(n, _): return try emitNode(n) diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index f95665288..dbb324b67 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -29,10 +29,6 @@ extension DSLTree.Node { // TODO: Should we handle this here? return nil - case .regexLiteral: - fatalError( - "unreachable: We should only ask atoms") - case let .convertedRegexLiteral(n, _): return try n.generateConsumer(opts) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index b29053e14..4237eda33 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -258,9 +258,6 @@ extension PrettyPrinter { case let .quotedLiteral(v): print(v._quoted) - case .regexLiteral: - printBackoff(node) - case let .convertedRegexLiteral(n, _): // FIXME: This recursion coordinates with back-off // check above, so it should work out. Need a diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 0714c5d2c..f42f93c32 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -71,9 +71,6 @@ extension DSLTree { case quotedLiteral(String) - /// An embedded literal. - case regexLiteral(_AST.ASTNode) - // TODO: What should we do here? /// /// TODO: Consider splitting off expression functions, or have our own kind @@ -343,7 +340,7 @@ extension DSLTree.Node { case let .conditional(_, t, f): return [t,f] - case .trivia, .empty, .quotedLiteral, .regexLiteral, + case .trivia, .empty, .quotedLiteral, .consumer, .matcher, .characterPredicate, .customCharacterClass, .atom: return [] @@ -357,7 +354,6 @@ extension DSLTree.Node { extension DSLTree.Node { var astNode: AST.Node? { switch self { - case let .regexLiteral(literal): return literal.ast case let .convertedRegexLiteral(_, literal): return literal.ast default: return nil } @@ -391,8 +387,6 @@ extension DSLTree.Node { switch self { case .capture: return true - case let .regexLiteral(re): - return re.ast.hasCapture case let .convertedRegexLiteral(n, re): assert(n.hasCapture == re.ast.hasCapture) return n.hasCapture @@ -603,9 +597,6 @@ extension DSLTree.Node { } child._addCaptures(to: &list, optionalNesting: optNesting) - case let .regexLiteral(re): - return re.ast._addCaptures(to: &list, optionalNesting: nesting) - case let .absentFunction(abs): switch abs.ast.kind { case .expression(_, _, let child): @@ -634,7 +625,7 @@ extension DSLTree.Node { return true case .orderedChoice, .concatenation, .capture, .conditional, .quantification, .customCharacterClass, .atom, - .trivia, .empty, .quotedLiteral, .regexLiteral, .absentFunction, + .trivia, .empty, .quotedLiteral, .absentFunction, .convertedRegexLiteral, .consumer, .characterPredicate, .matcher: return false @@ -693,7 +684,7 @@ extension DSLTree { case let .conditional(_, t, f): return [_Tree(t), _Tree(f)] - case .trivia, .empty, .quotedLiteral, .regexLiteral, + case .trivia, .empty, .quotedLiteral, .consumer, .matcher, .characterPredicate, .customCharacterClass, .atom: return [] From 363af4d8d3494ea234f233c3eb589309f1d14e7f Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 5 Jul 2022 22:16:04 +0100 Subject: [PATCH 2/6] Use `nil` instead of `.some(nil)` for failed captures Previously we would wrap a `nil` in `optionalCount - 1` outer layers of `.some(...)`. Change this to return an `optionalCount` nested optional with a top-level value of `nil`. --- Sources/_StringProcessing/Capture.swift | 27 +++++++++++---------- Tests/RegexBuilderTests/RegexDSLTests.swift | 21 ++++++++++++++++ 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index a8d663651..b75d01392 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -17,23 +17,24 @@ func constructExistentialOutputComponent( component: (range: Range, value: Any?)?, optionalCount: Int ) -> Any { - let someCount: Int - var underlying: Any if let component = component { - underlying = component.value ?? input[component.range] - someCount = optionalCount + var underlying = component.value ?? input[component.range] + for _ in 0 ..< optionalCount { + func wrap(_ x: T) { + underlying = Optional(x) as Any + } + _openExistential(underlying, do: wrap) + } + return underlying } else { - // Ok since we Any-box every step up the ladder - underlying = Optional(nil) as Any - someCount = optionalCount - 1 - } - for _ in 0..(_ x: T) { - underlying = Optional(x) as Any + precondition(optionalCount > 0, "Must have optional type") + func makeNil(_ x: T.Type) -> Any { + T?.none as Any } - _openExistential(underlying, do: wrap) + let underlyingTy = TypeConstruction.optionalType( + of: Substring.self, depth: optionalCount - 1) + return _openExistential(underlyingTy, do: makeNil) } - return underlying } @available(SwiftStdlib 5.7, *) diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 6af10ff68..88397d346 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -459,6 +459,8 @@ class RegexDSLTests: XCTestCase { try _testDSLCaptures( ("abcdef2", ("abcdef2", "f")), + ("2", ("2", nil)), + ("", ("", nil)), matchType: (Substring, Substring??).self, ==) { Optionally { @@ -1260,6 +1262,25 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(try replace("{bar}"), "foo") } + + func testOptionalNesting() throws { + let r = Regex { + Optionally { + Optionally { + Capture { + "a" + } + } + } + } + if let _ = try r.wholeMatch(in: "")!.output.1 { + XCTFail("Unexpected capture match") + } + if let _ = try r.wholeMatch(in: "a")!.output.1 {} + else { + XCTFail("Expected to match capture") + } + } } extension Unicode.Scalar { From 79f93a3f902b6d34a6fc1e3bd6b77cea99972455 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 5 Jul 2022 22:16:04 +0100 Subject: [PATCH 3/6] Statically enforce type equivalence for `_testDSLCaptures` --- Tests/RegexBuilderTests/RegexDSLTests.swift | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 88397d346..60e92f0ba 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -22,7 +22,7 @@ class RegexDSLTests: XCTestCase { file: StaticString = #file, line: UInt = #line, @RegexComponentBuilder _ content: () -> Content - ) throws { + ) throws where Content.RegexOutput == MatchType { let regex = content() for (input, maybeExpectedCaptures) in tests { let maybeMatch = input.wholeMatch(of: regex) @@ -44,13 +44,7 @@ class RegexDSLTests: XCTestCase { XCTFail("Unexpectedly matched", file: file, line: line) continue } - XCTAssertTrue( - type(of: regex).RegexOutput.self == MatchType.self, - """ - Expected match type: \(MatchType.self) - Actual match type: \(type(of: regex).RegexOutput.self) - """) - let captures = try XCTUnwrap(match.output as? MatchType, file: file, line: line) + let captures = match.output XCTAssertTrue( equivalence(captures, expectedCaptures), "'\(captures)' is not equal to the expected '\(expectedCaptures)'.", From 1519ef4552e57191d51b83f905dcf43d3e815628 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 5 Jul 2022 22:16:05 +0100 Subject: [PATCH 4/6] Flatten optional nesting for regex literal captures When computing the CaptureList for AST nodes, including converted AST -> DSL nodes, only permit at most one level of optionality. This means that regex literal captures are now either `Substring` or `Substring?`. Optional nesting is however still performed in the DSL (due to result builder limitations). If a regex literal is nested in the DSL, it may only add at most one extra level of optionality to the current nesting level. --- .../Regex/Parse/CaptureList.swift | 93 ++++++++---- Sources/_StringProcessing/Regex/DSLTree.swift | 66 +++++---- Tests/RegexBuilderTests/RegexDSLTests.swift | 82 +++++++++++ Tests/RegexTests/CaptureTests.swift | 134 +++++++++--------- 4 files changed, 252 insertions(+), 123 deletions(-) diff --git a/Sources/_RegexParser/Regex/Parse/CaptureList.swift b/Sources/_RegexParser/Regex/Parse/CaptureList.swift index 509fbf9bc..5c448dc5a 100644 --- a/Sources/_RegexParser/Regex/Parse/CaptureList.swift +++ b/Sources/_RegexParser/Regex/Parse/CaptureList.swift @@ -57,63 +57,105 @@ extension CaptureList { } } +extension CaptureList { + public struct Builder { + public var captures = CaptureList() + + public init() {} + + public struct OptionalNesting { + // We maintain two depths, inner and outer. These allow e.g the nesting + // of a regex literal in a DSL, where outside of the scope of the literal, + // nesting is allowed, but inside the literal at most one extra layer of + // optionality may be added. + public var outerDepth: Int + public var canNest: Bool + public var innerDepth: Int + + internal init(outerDepth: Int, canNest: Bool) { + self.outerDepth = outerDepth + self.canNest = canNest + self.innerDepth = 0 + } + + public init(canNest: Bool) { + self.init(outerDepth: 0, canNest: canNest) + } + + public var depth: Int { outerDepth + innerDepth } + + public var disablingNesting: Self { + // If we are currently able to nest, store the current depth as the + // outer depth, and disable nesting for an inner scope. + guard canNest else { return self } + return .init(outerDepth: depth, canNest: false) + } + + public var addingOptional: Self { + var result = self + result.innerDepth = canNest ? innerDepth + 1 : 1 + return result + } + } + } +} + // MARK: Generating from AST -extension AST.Node { - public func _addCaptures( - to list: inout CaptureList, - optionalNesting nesting: Int +extension CaptureList.Builder { + public mutating func addCaptures( + of node: AST.Node, optionalNesting nesting: OptionalNesting ) { - let addOptional = nesting+1 - switch self { + switch node { case let .alternation(a): for child in a.children { - child._addCaptures(to: &list, optionalNesting: addOptional) + addCaptures(of: child, optionalNesting: nesting.addingOptional) } case let .concatenation(c): for child in c.children { - child._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting) } case let .group(g): switch g.kind.value { case .capture: - list.append(.init(optionalDepth: nesting, g.location)) + captures.append(.init(optionalDepth: nesting.depth, g.location)) case .namedCapture(let name): - list.append(.init(name: name.value, optionalDepth: nesting, g.location)) + captures.append(.init( + name: name.value, optionalDepth: nesting.depth, g.location)) case .balancedCapture(let b): - list.append(.init(name: b.name?.value, optionalDepth: nesting, - g.location)) + captures.append(.init( + name: b.name?.value, optionalDepth: nesting.depth, g.location)) default: break } - g.child._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: g.child, optionalNesting: nesting) case .conditional(let c): switch c.condition.kind { case .group(let g): - AST.Node.group(g)._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: .group(g), optionalNesting: nesting) default: break } - c.trueBranch._addCaptures(to: &list, optionalNesting: addOptional) - c.falseBranch._addCaptures(to: &list, optionalNesting: addOptional) + addCaptures(of: c.trueBranch, optionalNesting: nesting.addingOptional) + addCaptures(of: c.falseBranch, optionalNesting: nesting.addingOptional) case .quantification(let q): var optNesting = nesting if q.amount.value.bounds.atLeast == 0 { - optNesting += 1 + optNesting = optNesting.addingOptional } - q.child._addCaptures(to: &list, optionalNesting: optNesting) + addCaptures(of: q.child, optionalNesting: optNesting) case .absentFunction(let abs): switch abs.kind { case .expression(_, _, let child): - child._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting) case .clearer, .repeater, .stopper: break } @@ -122,16 +164,17 @@ extension AST.Node { break } } + public static func build(_ ast: AST) -> CaptureList { + var builder = Self() + builder.captures.append(.init(optionalDepth: 0, .fake)) + builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false)) + return builder.captures + } } extension AST { /// The capture list (including the whole match) of this AST. - public var captureList: CaptureList { - var caps = CaptureList() - caps.append(.init(optionalDepth: 0, .fake)) - root._addCaptures(to: &caps, optionalNesting: 0) - return caps - } + public var captureList: CaptureList { .Builder.build(self) } } // MARK: Convenience for testing and inspection diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index f42f93c32..740bdcb8d 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -545,68 +545,62 @@ struct CaptureTransform: Hashable, CustomStringConvertible { } } -// MARK: AST wrapper types -// -// These wrapper types are required because even @_spi-marked public APIs can't -// include symbols from implementation-only dependencies. - -extension DSLTree.Node { - func _addCaptures( - to list: inout CaptureList, - optionalNesting nesting: Int +extension CaptureList.Builder { + mutating func addCaptures( + of node: DSLTree.Node, optionalNesting nesting: OptionalNesting ) { - let addOptional = nesting+1 - switch self { + switch node { case let .orderedChoice(children): for child in children { - child._addCaptures(to: &list, optionalNesting: addOptional) + addCaptures(of: child, optionalNesting: nesting.addingOptional) } case let .concatenation(children): for child in children { - child._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting) } case let .capture(name, _, child, transform): - list.append(.init( + captures.append(.init( name: name, type: transform?.resultType ?? child.wholeMatchType, - optionalDepth: nesting, .fake)) - child._addCaptures(to: &list, optionalNesting: nesting) + optionalDepth: nesting.depth, .fake)) + addCaptures(of: child, optionalNesting: nesting) case let .nonCapturingGroup(kind, child): assert(!kind.ast.isCapturing) - child._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting) case let .conditional(cond, trueBranch, falseBranch): switch cond.ast { case .group(let g): - AST.Node.group(g)._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: .group(g), optionalNesting: nesting) default: break } - trueBranch._addCaptures(to: &list, optionalNesting: addOptional) - falseBranch._addCaptures(to: &list, optionalNesting: addOptional) - + addCaptures(of: trueBranch, optionalNesting: nesting.addingOptional) + addCaptures(of: falseBranch, optionalNesting: nesting.addingOptional) case let .quantification(amount, _, child): var optNesting = nesting if amount.ast.bounds.atLeast == 0 { - optNesting += 1 + optNesting = optNesting.addingOptional } - child._addCaptures(to: &list, optionalNesting: optNesting) + addCaptures(of: child, optionalNesting: optNesting) case let .absentFunction(abs): switch abs.ast.kind { case .expression(_, _, let child): - child._addCaptures(to: &list, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting) case .clearer, .repeater, .stopper: break } case let .convertedRegexLiteral(n, _): - return n._addCaptures(to: &list, optionalNesting: nesting) + // We disable nesting for converted AST trees, as literals do not nest + // captures. This includes literals nested in a DSL. + return addCaptures(of: n, optionalNesting: nesting.disablingNesting) case .matcher: break @@ -617,6 +611,16 @@ extension DSLTree.Node { } } + static func build(_ dsl: DSLTree) -> CaptureList { + var builder = Self() + builder.captures.append( + .init(type: dsl.root.wholeMatchType, optionalDepth: 0, .fake)) + builder.addCaptures(of: dsl.root, optionalNesting: .init(canNest: true)) + return builder.captures + } +} + +extension DSLTree.Node { /// Returns true if the node is output-forwarding, i.e. not defining its own /// output but forwarding its only child's output. var isOutputForwarding: Bool { @@ -651,13 +655,13 @@ extension DSLTree.Node { } } +// MARK: AST wrapper types +// +// These wrapper types are required because even @_spi-marked public APIs can't +// include symbols from implementation-only dependencies. + extension DSLTree { - var captureList: CaptureList { - var list = CaptureList() - list.append(.init(type: root.wholeMatchType, optionalDepth: 0, .fake)) - root._addCaptures(to: &list, optionalNesting: 0) - return list - } + var captureList: CaptureList { .Builder.build(self) } /// Presents a wrapped version of `DSLTree.Node` that can provide an internal /// `_TreeNode` conformance. diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 60e92f0ba..f56e42eb9 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -1258,6 +1258,88 @@ class RegexDSLTests: XCTestCase { } func testOptionalNesting() throws { + try _testDSLCaptures( + ("a", ("a", nil)), + ("", ("", nil)), + ("b", ("b", "b")), + ("bb", ("bb", "b")), + matchType: (Substring, Substring?).self, ==) + { + try! Regex("(?:a|(b)*)?", as: (Substring, Substring?).self) + } + + try _testDSLCaptures( + ("a", ("a", nil)), + ("", ("", nil)), + ("b", ("b", "b")), + ("bb", ("bb", "b")), + matchType: (Substring, Substring??).self, ==) + { + Optionally { + try! Regex("a|(b)*", as: (Substring, Substring?).self) + } + } + + try _testDSLCaptures( + ("a", ("a", nil)), + ("", ("", nil)), + ("b", ("b", "b")), + ("bb", ("bb", "b")), + matchType: (Substring, Substring???).self, ==) + { + Optionally { + ChoiceOf { + try! Regex("a", as: Substring.self) + try! Regex("(b)*", as: (Substring, Substring?).self) + } + } + } + + try _testDSLCaptures( + ("a", ("a", nil)), + ("", ("", nil)), + ("b", ("b", "b")), + ("bb", ("bb", "b")), + matchType: (Substring, Substring??).self, ==) + { + ChoiceOf { + try! Regex("a", as: Substring.self) + try! Regex("(b)*", as: (Substring, Substring?).self) + } + } + + try _testDSLCaptures( + ("a", ("a", nil)), + ("", ("", nil)), + ("b", ("b", "b")), + ("bb", ("bb", "b")), + matchType: (Substring, Substring??).self, ==) + { + ChoiceOf { + try! Regex("a", as: Substring.self) + ZeroOrMore { + try! Regex("(b)", as: (Substring, Substring).self) + } + } + } + + try _testDSLCaptures( + ("a", ("a", nil)), + ("", ("", nil)), + ("b", ("b", "b")), + ("bb", ("bb", "b")), + matchType: (Substring, Substring??).self, ==) + { + ChoiceOf { + try! Regex("a", as: Substring.self) + ZeroOrMore { + Capture { + try! Regex("b", as: Substring.self) + } + } + } + } + let r = Regex { Optionally { Optionally { diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 952b65ec6..4cee7f009 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -22,21 +22,6 @@ extension CaptureList.Capture { static var opt: Self { return Self(optionalDepth: 1, .fake) } - static var opt_opt: Self { - return Self(optionalDepth: 2, .fake) - } - static var opt_opt_opt: Self { - return Self(optionalDepth: 3, .fake) - } - static var opt_opt_opt_opt: Self { - return Self(optionalDepth: 4, .fake) - } - static var opt_opt_opt_opt_opt: Self { - return Self(optionalDepth: 5, .fake) - } - static var opt_opt_opt_opt_opt_opt: Self { - return Self(optionalDepth: 6, .fake) - } static func named(_ name: String, opt: Int = 0) -> Self { return Self(name: name, optionalDepth: opt, .fake) } @@ -201,7 +186,7 @@ func captureTest( guard let result = try! executor.dynamicMatch( input, in: inputRange, .wholeString ) else { - XCTFail("No match") + XCTFail("No match", file: file, line: line) return } @@ -210,12 +195,12 @@ func captureTest( caps._elements.removeFirst() guard caps.count == output.count else { XCTFail(""" - Mismatch capture count: + Mismatched capture count: Expected: \(output) Seen: \(caps.formatStringCaptures(input: input)) - """) + """, file: file, line: line) continue } @@ -223,12 +208,12 @@ func captureTest( $0.isEqual(to: $1, in: input) }) else { XCTFail(""" - Mismatch capture count: + Mismatched captures: Expected: \(output) Seen: \(caps.formatStringCaptures(input: input)) - """) + """, file: file, line: line) continue } } @@ -298,37 +283,38 @@ extension RegexTests { captureTest( "((a)|(b))?", - [.opt, .opt_opt, .opt_opt], - ("a", [.some("a"), .some(.some("a")), .some(.none)]), - ("b", [.some("b"), .some(.none), .some(.some("b"))])) + [.opt, .opt, .opt], + ("a", [.some("a"), .some("a"), .none]), + ("b", [.some("b"), .none, .some("b")])) - // FIXME captureTest( "((a)|(b))*", - [.opt, .opt_opt, .opt_opt], - ("a", [.some("a"), .some(.some("a")), .some(.none)]), - skipEngine: true) + [.opt, .opt, .opt], + ("a", [.some("a"), .some("a"), .none])) - // FIXME captureTest( "((a)|(b))+", [.cap, .opt, .opt], - // TODO: test cases - skipEngine: true) + ("a", ["a", .some("a"), .none]), + ("aaa", ["a", .some("a"), .none]), + ("b", ["b", .none, .some("b")]), + ("bbaba", ["a", .some("a"), .some("b")])) - // FIXME captureTest( "(((a)|(b))*)", - [.cap, .opt, .opt_opt, .opt_opt], - // TODO: test cases - skipEngine: true) + [.cap, .opt, .opt, .opt], + ("a", ["a", .some("a"), .some("a"), .none]), + ("aaa", ["aaa", .some("a"), .some("a"), .none]), + ("b", ["b", .some("b"), .none, .some("b")]), + ("bbaba", ["bbaba", .some("a"), .some("a"), .some("b")]), + ("", ["", .none, .none, .none])) - // FIXME captureTest( "(((a)|(b))?)", - [.cap, .opt, .opt_opt, .opt_opt], - // TODO: test cases - skipEngine: true) + [.cap, .opt, .opt, .opt], + ("a", ["a", .some("a"), .some("a"), .none]), + ("b", ["b", .some("b"), .none, .some("b")]), + ("", ["", .none, .none, .none])) captureTest( "(a)", @@ -345,11 +331,13 @@ extension RegexTests { [.cap, .cap, .cap], ("a", ["a", "a", "a"])) - // FIXME captureTest( "((((a)*)?)*)?", - [.opt, .opt_opt, .opt_opt_opt, .opt_opt_opt_opt], - // TODO: test cases + [.opt, .opt, .opt, .opt], + ("a", [.some("a"), .some("a"), .some("a"), .some("a")]), + ("aaa", [.some("aaa"), .some("aaa"), .some("aaa"), .some("a")]), + ("", [.some(""), .none, .none, .none]), + // FIXME: This spins the matching engine forever. skipEngine: true) captureTest( @@ -360,33 +348,27 @@ extension RegexTests { ("b", [.some("b")]), ("bbb", [.some("bbb")])) - // FIXME captureTest( "a|(b)*", - [.opt_opt], + [.opt], ("a", [.none]), - ("", [.some("")]), + ("", [.none]), ("b", [.some("b")]), - ("bbb", [.some("b")]), - skipEngine: true) + ("bbb", [.some("b")])) - // FIXME captureTest( "a|(b)+", [.opt], ("a", [.none]), ("b", [.some("b")]), - ("bbb", [.some("b")]), - skipEngine: true) + ("bbb", [.some("b")])) - // FIXME captureTest( "a|(b)?", - [.opt_opt], + [.opt], ("a", [.none]), ("", [.none]), - ("b", [.some(.some("b"))]), - skipEngine: true) + ("b", [.some("b")])) captureTest( "a|(b|c)", @@ -402,25 +384,21 @@ extension RegexTests { ("b", [.some("b")]), ("c", [.some("c")])) - // FIXME captureTest( "a|(b|c)*", - [.opt_opt], + [.opt], ("a", [.none]), - ("", [.some("")]), + ("", [.none]), ("b", [.some("b")]), - ("bbb", [.some("b")]), - skipEngine: true) + ("bbb", [.some("b")])) - // FIXME captureTest( "a|(b|c)?", - [.opt_opt], + [.opt], ("a", [.none]), ("", [.none]), - ("b", [.some(.some("b"))]), - ("c", [.some(.some("c"))]), - skipEngine: true) + ("b", [.some("b")]), + ("c", [.some("c")])) captureTest( "a(b(c))", @@ -460,9 +438,31 @@ extension RegexTests { ("a", [.none, .none]), ("abc", [.some("bc"), .some("c")])) - // TODO: "((a|b)*|c)*" - // TODO: "((a|b)|c)*" + captureTest( + "((a|b)*|c)*", + [.opt, .opt], + ("a", [.some("a"), .some("a")]), + ("b", [.some("b"), .some("b")]), + ("c", [.some("c"), .none]), + ("ababc", [.some("c"), .some("b")]), + ("acab", [.some("b"), .some("b")]), + ("cbba", [.some("a"), .some("a")]), + ("cbbaaa", [.some("aaa"), .some("a")]), + ("", [.none, .none]), + // FIXME: This spins the matching engine forever. + skipEngine: true) + captureTest( + "((a|b)|c)*", + [.opt, .opt], + ("a", [.some("a"), .some("a")]), + ("b", [.some("b"), .some("b")]), + ("c", [.some("c"), .none]), + ("ababc", [.some("c"), .some("b")]), + ("acab", [.some("b"), .some("b")]), + ("cbba", [.some("a"), .some("a")]), + ("cbbaaa", [.some("a"), .some("a")]), + ("", [.none, .none])) } func testTypeVerification() throws { @@ -487,9 +487,9 @@ extension RegexTests { XCTAssertNil(Regex<(Substring, Substring?)>(opaque4)) let opaque5 = try Regex("((a)?bc)?") - _ = try XCTUnwrap(Regex<(Substring, Substring?, Substring??)>(opaque5)) + _ = try XCTUnwrap(Regex<(Substring, Substring?, Substring?)>(opaque5)) + XCTAssertNil(Regex<(Substring, Substring?, Substring??)>(opaque5)) XCTAssertNil(Regex<(Substring, somethingHere: Substring?, here: Substring??)>(opaque5)) - XCTAssertNil(Regex<(Substring, Substring?, Substring?)>(opaque5)) } } From 1221c09ec47f014306778989e2eec3aad3bca3bb Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 5 Jul 2022 22:16:05 +0100 Subject: [PATCH 5/6] Remove `nestOptionals` param from `_captureStructure` This is no longer needed as the capture list has the right nesting information. --- .../Regex/Parse/CaptureStructure.swift | 22 +++++++------------ Tests/RegexTests/ParseTests.swift | 2 +- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift b/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift index 3212699fb..bde174c14 100644 --- a/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift +++ b/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift @@ -225,33 +225,27 @@ extension CaptureStructure: CustomStringConvertible { extension AST { /// The capture structure of this AST for compiler communication. var captureStructure: CaptureStructure { - captureList._captureStructure(nestOptionals: true) + captureList._captureStructure } } // MARK: Convert CaptureList into CaptureStructure extension CaptureList { - func _captureStructure(nestOptionals: Bool) -> CaptureStructure { + var _captureStructure: CaptureStructure { if captures.isEmpty { return .empty } if captures.count == 1 { - return captures.first!._captureStructure(nestOptionals: nestOptionals) + return captures.first!._captureStructure } - return .tuple(captures.map { - $0._captureStructure(nestOptionals: nestOptionals) - }) + return .tuple(captures.map(\._captureStructure)) } } extension CaptureList.Capture { - func _captureStructure(nestOptionals: Bool) -> CaptureStructure { - if optionalDepth == 0 { - return .atom(name: name, type: type == Substring.self ? nil : .init(type)) - } - var copy = self - copy.optionalDepth = 0 - var base = copy._captureStructure(nestOptionals: false) - for _ in 0..<(nestOptionals ? optionalDepth : 1) { + var _captureStructure: CaptureStructure { + var base = CaptureStructure.atom( + name: name, type: type == Substring.self ? nil : .init(type)) + for _ in 0 ..< optionalDepth { base = .optional(base) } return base diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 9803bb2e1..3c43f27af 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -90,7 +90,7 @@ func parseTest( } // Test capture structure round trip serialization. - let capStruct = captures._captureStructure(nestOptionals: true) + let capStruct = captures._captureStructure let serializedCapturesSize = CaptureStructure.serializationBufferSize( forInputUTF8CodeUnitCount: input.utf8.count) let serializedCaptures = UnsafeMutableRawBufferPointer.allocate( From 581c317a5032e88fa100ebae37643d35bf1ca9c8 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Wed, 6 Jul 2022 13:40:14 +0100 Subject: [PATCH 6/6] Fix `_testDSLCaptures` Previously we would ignore the case where the match fails, but the test expects the match to succeed. --- Tests/RegexBuilderTests/RegexDSLTests.swift | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index f56e42eb9..b67c6c242 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -270,10 +270,11 @@ class RegexDSLTests: XCTestCase { } .ignoresCase(false) } - + + // FIXME: Re-enable this test try _testDSLCaptures( ("can't stop won't stop", ("can't stop won't stop", "can't", "won't")), - matchType: (Substring, Substring, Substring).self, ==) { + matchType: (Substring, Substring, Substring).self, ==, xfail: true) { Capture { OneOrMore(.word) Anchor.wordBoundary @@ -289,10 +290,11 @@ class RegexDSLTests: XCTestCase { OneOrMore(.any, .reluctant) "stop" } - + + // FIXME: Re-enable this test try _testDSLCaptures( ("can't stop won't stop", ("can't stop won't stop", "can", "won")), - matchType: (Substring, Substring, Substring).self, ==) { + matchType: (Substring, Substring, Substring).self, ==, xfail: true) { Capture { OneOrMore(.word) Anchor.wordBoundary