From 871a6b5a6749b6a30603faed598a49f59ca96b27 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Fri, 15 Apr 2022 14:33:25 -0700 Subject: [PATCH 1/5] Add missing `@_implementationOnly` imports --- Sources/RegexBuilder/Anchor.swift | 2 +- Sources/RegexBuilder/CharacterClass.swift | 2 +- Sources/RegexBuilder/DSL.swift | 2 +- Sources/RegexBuilder/Variadics.swift | 1 - Sources/VariadicsGenerator/VariadicsGenerator.swift | 1 - 5 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Sources/RegexBuilder/Anchor.swift b/Sources/RegexBuilder/Anchor.swift index 55b554aea..190206ee6 100644 --- a/Sources/RegexBuilder/Anchor.swift +++ b/Sources/RegexBuilder/Anchor.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -import _RegexParser +@_implementationOnly import _RegexParser @_spi(RegexBuilder) import _StringProcessing @available(SwiftStdlib 5.7, *) diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift index d163c336b..70c5c98f0 100644 --- a/Sources/RegexBuilder/CharacterClass.swift +++ b/Sources/RegexBuilder/CharacterClass.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -import _RegexParser +@_implementationOnly import _RegexParser @_spi(RegexBuilder) import _StringProcessing @available(SwiftStdlib 5.7, *) diff --git a/Sources/RegexBuilder/DSL.swift b/Sources/RegexBuilder/DSL.swift index 3c5f5ab5f..693e9fcd7 100644 --- a/Sources/RegexBuilder/DSL.swift +++ b/Sources/RegexBuilder/DSL.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -import _RegexParser +@_implementationOnly import _RegexParser @_spi(RegexBuilder) import _StringProcessing @available(SwiftStdlib 5.7, *) diff --git a/Sources/RegexBuilder/Variadics.swift b/Sources/RegexBuilder/Variadics.swift index 2ac5b3231..1a8dead83 100644 --- a/Sources/RegexBuilder/Variadics.swift +++ b/Sources/RegexBuilder/Variadics.swift @@ -11,7 +11,6 @@ // BEGIN AUTO-GENERATED CONTENT -import _RegexParser @_spi(RegexBuilder) import _StringProcessing @available(SwiftStdlib 5.7, *) diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index 2df2f7c96..4c4a322aa 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -121,7 +121,6 @@ struct VariadicsGenerator: ParsableCommand { // BEGIN AUTO-GENERATED CONTENT - import _RegexParser @_spi(RegexBuilder) import _StringProcessing From 5f0df5a8ea9a43dc67c81d077761b926936cef9a Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 18 Apr 2022 13:46:21 -0500 Subject: [PATCH 2/5] Progress... --- Sources/RegexBuilder/DSL.swift | 10 +- Sources/RegexBuilder/Variadics.swift | 44 +++--- .../VariadicsGenerator.swift | 4 +- Sources/_StringProcessing/ByteCodeGen.swift | 6 +- .../_StringProcessing/PrintAsPattern.swift | 6 +- .../Regex/ASTConversion.swift | 12 +- .../Regex/DSLConsumers.swift | 2 +- Sources/_StringProcessing/Regex/DSLTree.swift | 127 +++++++++++++++--- Sources/_StringProcessing/Regex/Options.swift | 2 +- 9 files changed, 148 insertions(+), 65 deletions(-) diff --git a/Sources/RegexBuilder/DSL.swift b/Sources/RegexBuilder/DSL.swift index 693e9fcd7..f88971f1e 100644 --- a/Sources/RegexBuilder/DSL.swift +++ b/Sources/RegexBuilder/DSL.swift @@ -105,7 +105,7 @@ public struct QuantificationBehavior { var kind: Kind - internal var astKind: AST.Quantification.Kind { + internal var astKind: DSLTree._AST.QuantificationKind { switch kind { case .eagerly: return .eager case .reluctantly: return .reluctant @@ -134,13 +134,13 @@ extension DSLTree.Node { return .quantification(.oneOrMore, behavior.astKind, node) case _ where range.count == 1: // ..<1 or ...0 or any range with count == 1 // Note: `behavior` is ignored in this case - return .quantification(.exactly(.init(faking: range.lowerBound)), .eager, node) + return .quantification(.exactly(range.lowerBound), .eager, node) case (0, _): // 0.. 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -713,7 +713,7 @@ extension Repeat { ) where RegexOutput == Substring { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -822,7 +822,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?), Component.RegexOutput == (W, C0) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -832,7 +832,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?), Component.RegexOutput == (W, C0) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -939,7 +939,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?), Component.RegexOutput == (W, C0, C1) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -949,7 +949,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?), Component.RegexOutput == (W, C0, C1) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1056,7 +1056,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?), Component.RegexOutput == (W, C0, C1, C2) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1066,7 +1066,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?), Component.RegexOutput == (W, C0, C1, C2) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1173,7 +1173,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?), Component.RegexOutput == (W, C0, C1, C2, C3) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1183,7 +1183,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?), Component.RegexOutput == (W, C0, C1, C2, C3) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1290,7 +1290,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C0, C1, C2, C3, C4) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1300,7 +1300,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C0, C1, C2, C3, C4) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1407,7 +1407,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1417,7 +1417,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1524,7 +1524,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1534,7 +1534,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1641,7 +1641,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1651,7 +1651,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1758,7 +1758,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1768,7 +1768,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1875,7 +1875,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1885,7 +1885,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } @available(SwiftStdlib 5.7, *) diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index 4c4a322aa..3358647fc 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -487,7 +487,7 @@ struct VariadicsGenerator: ParsableCommand { ) \(params.whereClauseForInit) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component.regex.root)) } \(defaultAvailableAttr) @@ -498,7 +498,7 @@ struct VariadicsGenerator: ParsableCommand { ) \(params.whereClauseForInit) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root)) + self.init(node: .quantification(.exactly(count), .eager, component().regex.root)) } \(defaultAvailableAttr) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index c8cf0805c..064ce6f7e 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -592,13 +592,13 @@ extension Compiler.ByteCodeGen { } case let .nonCapturingGroup(kind, child): - try emitNoncapturingGroup(kind, child) + try emitNoncapturingGroup(kind.ast, child) case .conditional: throw Unsupported("Conditionals") case let .quantification(amt, kind, child): - try emitQuantification(amt, kind, child) + try emitQuantification(amt.ast, kind.ast, child) case let .customCharacterClass(ccc): if ccc.containsAny { @@ -634,7 +634,7 @@ extension Compiler.ByteCodeGen { } case let .regexLiteral(l): - try emitNode(l.dslTreeNode) + try emitNode(l.ast.dslTreeNode) case let .convertedRegexLiteral(n, _): try emitNode(n) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index edf4fad40..837c50240 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -90,7 +90,7 @@ extension PrettyPrinter { } case let .nonCapturingGroup(kind, child): - let kind = kind._patternBase + let kind = kind.ast._patternBase printBlock("Group(\(kind))") { printer in printer.printAsPattern(convertedFromAST: child) } @@ -108,8 +108,8 @@ extension PrettyPrinter { print("/* TODO: conditional */") case let .quantification(amount, kind, child): - let amount = amount._patternBase - let kind = kind._patternBase + let amount = amount.ast._patternBase + let kind = kind.ast._patternBase printBlock("\(amount)(\(kind))") { printer in printer.printAsPattern(convertedFromAST: child) } diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index d32770eae..00d5eef23 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -40,7 +40,7 @@ extension AST.Node { // TODO: Should we do this for the // single-concatenation child too, or should? // we wrap _that_? - return .convertedRegexLiteral(node, self) + return .convertedRegexLiteral(node, .init(ast: self)) } // Convert the top-level node without wrapping @@ -111,19 +111,19 @@ extension AST.Node { case .balancedCapture: throw Unsupported("TODO: balanced captures") default: - return .nonCapturingGroup(v.kind.value, child) + return .nonCapturingGroup(.init(ast: v.kind.value), child) } case let .conditional(v): let trueBranch = v.trueBranch.dslTreeNode let falseBranch = v.falseBranch.dslTreeNode return .conditional( - v.condition.kind, trueBranch, falseBranch) + .init(ast: v.condition.kind), trueBranch, falseBranch) case let .quantification(v): let child = v.child.dslTreeNode return .quantification( - v.amount.value, v.kind.value, child) + .init(ast: v.amount.value), .init(ast: v.kind.value), child) case let .quote(v): return .quotedLiteral(v.literal) @@ -140,9 +140,9 @@ extension AST.Node { case .empty(_): return .empty - case let .absentFunction(a): + case let .absentFunction(abs): // TODO: What should this map to? - return .absentFunction(a) + return .absentFunction(.init(ast: abs)) } } diff --git a/Sources/_StringProcessing/Regex/DSLConsumers.swift b/Sources/_StringProcessing/Regex/DSLConsumers.swift index e1a69d74b..0ad824090 100644 --- a/Sources/_StringProcessing/Regex/DSLConsumers.swift +++ b/Sources/_StringProcessing/Regex/DSLConsumers.swift @@ -21,7 +21,7 @@ public protocol CustomRegexComponent: RegexComponent { @available(SwiftStdlib 5.7, *) extension CustomRegexComponent { public var regex: Regex { - Regex(node: .matcher(.init(RegexOutput.self), { input, index, bounds in + Regex(node: .matcher(RegexOutput.self, { input, index, bounds in match(input, startingAt: index, in: bounds) })) } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 6bf4ec47c..968ccfc3e 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -24,7 +24,7 @@ public struct DSLTree { extension DSLTree { @_spi(RegexBuilder) - public indirect enum Node: _TreeNode { + public indirect enum Node: _DSLTreeNode { /// Try to match each node in order /// /// ... | ... | ... @@ -42,7 +42,7 @@ extension DSLTree { name: String? = nil, reference: ReferenceID? = nil, Node) /// Match a (non-capturing) subpattern / group - case nonCapturingGroup(AST.Group.Kind, Node) + case nonCapturingGroup(_AST.GroupKind, Node) // TODO: Consider splitting off grouped conditions, or have // our own kind @@ -52,11 +52,11 @@ extension DSLTree { /// (?(cond) true-branch | false-branch) /// case conditional( - AST.Conditional.Condition.Kind, Node, Node) + _AST.ConditionKind, Node, Node) case quantification( - AST.Quantification.Amount, - AST.Quantification.Kind, + _AST.QuantificationAmount, + _AST.QuantificationKind, Node) case customCharacterClass(CustomCharacterClass) @@ -74,19 +74,19 @@ extension DSLTree { case quotedLiteral(String) /// An embedded literal - case regexLiteral(AST.Node) + case regexLiteral(_AST.ASTNode) // TODO: What should we do here? /// /// TODO: Consider splitting off expression functions, or have our own kind - case absentFunction(AST.AbsentFunction) + case absentFunction(_AST.AbsentFunction) // MARK: - Tree conversions /// The target of AST conversion. /// /// Keeps original AST around for rich syntactic and source information - case convertedRegexLiteral(Node, AST.Node) + case convertedRegexLiteral(Node, _AST.ASTNode) // MARK: - Extensibility points @@ -95,7 +95,7 @@ extension DSLTree { case consumer(_ConsumerInterface) - case matcher(AnyType, _MatcherInterface) + case matcher(Any.Type, _MatcherInterface) // TODO: Would this just boil down to a consumer? case characterPredicate(_CharacterPredicateInterface) @@ -216,8 +216,8 @@ extension DSLTree.Node { .customCharacterClass, .atom: return [] - case let .absentFunction(a): - return a.children.map(\.dslTreeNode) + case let .absentFunction(abs): + return abs.ast.children.map(\.dslTreeNode) } } } @@ -225,8 +225,8 @@ extension DSLTree.Node { extension DSLTree.Node { var astNode: AST.Node? { switch self { - case let .regexLiteral(ast): return ast - case let .convertedRegexLiteral(_, ast): return ast + case let .regexLiteral(literal): return literal.ast + case let .convertedRegexLiteral(_, literal): return literal.ast default: return nil } } @@ -270,9 +270,9 @@ extension DSLTree.Node { case .capture: return true case let .regexLiteral(re): - return re.hasCapture + return re.ast.hasCapture case let .convertedRegexLiteral(n, re): - assert(n.hasCapture == re.hasCapture) + assert(n.hasCapture == re.ast.hasCapture) return n.hasCapture default: @@ -308,25 +308,25 @@ extension DSLTree.Node { return constructor.capturing(name: name, child) case let .nonCapturingGroup(kind, child): - assert(!kind.isCapturing) - return constructor.grouping(child, as: kind) + assert(!kind.ast.isCapturing) + return constructor.grouping(child, as: kind.ast) case let .conditional(cond, trueBranch, falseBranch): return constructor.condition( - cond, + cond.ast, trueBranch: trueBranch, falseBranch: falseBranch) case let .quantification(amount, _, child): return constructor.quantifying( - child, amount: amount) + child, amount: amount.ast) case let .regexLiteral(re): // TODO: Force a re-nesting? - return re._captureStructure(&constructor) + return re.ast._captureStructure(&constructor) case let .absentFunction(abs): - return constructor.absent(abs.kind) + return constructor.absent(abs.ast.kind) case let .convertedRegexLiteral(n, _): // TODO: Switch nesting strategy? @@ -348,7 +348,7 @@ extension DSLTree.Node { var valueCaptureType: AnyType? { switch self { case let .matcher(t, _): - return t + return AnyType(t) case let .transform(t, _): return AnyType(t.resultType) default: return nil @@ -445,3 +445,86 @@ public struct CaptureTransform: Hashable, CustomStringConvertible { "" } } + +// MARK: AST wrapper types +// +// These wrapper types are required because even @_spi-marked public APIs can't +// include symbols from implementation-only dependencies. +internal protocol _DSLTreeNode: _TreeNode {} + +extension DSLTree { + @_spi(RegexBuilder) + public enum _AST { + @_spi(RegexBuilder) + public struct GroupKind { + internal var ast: AST.Group.Kind + + @_spi(RegexBuilder) public static var atomicNonCapturing: Self { + .init(ast: .atomicNonCapturing) + } + @_spi(RegexBuilder) public static var lookahead: Self { + .init(ast: .lookahead) + } + @_spi(RegexBuilder) public static var negativeLookahead: Self { + .init(ast: .negativeLookahead) + } + } + + @_spi(RegexBuilder) + public struct ConditionKind { + internal var ast: AST.Conditional.Condition.Kind + } + + @_spi(RegexBuilder) + public struct QuantificationKind { + internal var ast: AST.Quantification.Kind + + @_spi(RegexBuilder) public static var eager: Self { + .init(ast: .eager) + } + @_spi(RegexBuilder) public static var reluctant: Self { + .init(ast: .reluctant) + } + @_spi(RegexBuilder) public static var possessive: Self { + .init(ast: .possessive) + } + } + + @_spi(RegexBuilder) + public struct QuantificationAmount { + internal var ast: AST.Quantification.Amount + + @_spi(RegexBuilder) public static var zeroOrMore: Self { + .init(ast: .zeroOrMore) + } + @_spi(RegexBuilder) public static var oneOrMore: Self { + .init(ast: .oneOrMore) + } + @_spi(RegexBuilder) public static var zeroOrOne: Self { + .init(ast: .zeroOrOne) + } + @_spi(RegexBuilder) public static func exactly(_ n: Int) -> Self { + .init(ast: .exactly(.init(faking: n))) + } + @_spi(RegexBuilder) public static func nOrMore(_ n: Int) -> Self { + .init(ast: .nOrMore(.init(faking: n))) + } + @_spi(RegexBuilder) public static func upToN(_ n: Int) -> Self { + .init(ast: .upToN(.init(faking: n))) + } + @_spi(RegexBuilder) public static func range(_ lower: Int, _ upper: Int) -> Self { + .init(ast: .range(.init(faking: lower), .init(faking: upper))) + } + } + + @_spi(RegexBuilder) + public struct ASTNode { + internal var ast: AST.Node + } + + @_spi(RegexBuilder) + public struct AbsentFunction { + internal var ast: AST.AbsentFunction + } + } +} diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift index d474caae3..16079dc99 100644 --- a/Sources/_StringProcessing/Regex/Options.swift +++ b/Sources/_StringProcessing/Regex/Options.swift @@ -173,6 +173,6 @@ extension RegexComponent { ? AST.MatchingOptionSequence(adding: [.init(option, location: .fake)]) : AST.MatchingOptionSequence(removing: [.init(option, location: .fake)]) return Regex(node: .nonCapturingGroup( - .changeMatchingOptions(sequence), regex.root)) + .init(ast: .changeMatchingOptions(sequence)), regex.root)) } } From 0f6b9eadc3e6107d2bb5716f85d519e438fa9754 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 18 Apr 2022 23:37:38 -0500 Subject: [PATCH 3/5] All wrapped except for CaptureStructure --- Sources/RegexBuilder/Anchor.swift | 35 ++--- Sources/RegexBuilder/CharacterClass.swift | 59 +------- Sources/_StringProcessing/ByteCodeGen.swift | 16 +-- .../_StringProcessing/ConsumerInterface.swift | 2 +- .../_StringProcessing/PrintAsPattern.swift | 11 +- .../Regex/ASTConversion.swift | 8 +- .../Regex/DSLConsumers.swift | 9 +- Sources/_StringProcessing/Regex/DSLTree.swift | 135 +++++++++++++++--- .../_CharacterClassModel.swift | 14 +- Tests/RegexBuilderTests/CustomTests.swift | 15 +- 10 files changed, 169 insertions(+), 135 deletions(-) diff --git a/Sources/RegexBuilder/Anchor.swift b/Sources/RegexBuilder/Anchor.swift index 190206ee6..e8cd4ac54 100644 --- a/Sources/RegexBuilder/Anchor.swift +++ b/Sources/RegexBuilder/Anchor.swift @@ -31,34 +31,21 @@ public struct Anchor { @available(SwiftStdlib 5.7, *) extension Anchor: RegexComponent { - var astAssertion: AST.Atom.AssertionKind { - if !isInverted { - switch kind { - case .startOfSubject: return .startOfSubject - case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline - case .endOfSubject: return .endOfSubject - case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject - case .textSegmentBoundary: return .textSegment - case .startOfLine: return .startOfLine - case .endOfLine: return .endOfLine - case .wordBoundary: return .wordBoundary - } - } else { - switch kind { - case .startOfSubject: fatalError("Not yet supported") - case .endOfSubjectBeforeNewline: fatalError("Not yet supported") - case .endOfSubject: fatalError("Not yet supported") - case .firstMatchingPositionInSubject: fatalError("Not yet supported") - case .textSegmentBoundary: return .notTextSegment - case .startOfLine: fatalError("Not yet supported") - case .endOfLine: fatalError("Not yet supported") - case .wordBoundary: return .notWordBoundary - } + var baseAssertion: DSLTree._AST.AssertionKind { + switch kind { + case .startOfSubject: return .startOfSubject(isInverted) + case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted) + case .endOfSubject: return .endOfSubject(isInverted) + case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted) + case .textSegmentBoundary: return .textSegmentBoundary(isInverted) + case .startOfLine: return .startOfLine(isInverted) + case .endOfLine: return .endOfLine(isInverted) + case .wordBoundary: return .wordBoundary(isInverted) } } public var regex: Regex { - Regex(node: .atom(.assertion(astAssertion))) + Regex(node: .atom(.assertion(baseAssertion))) } } diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift index 70c5c98f0..0087d734a 100644 --- a/Sources/RegexBuilder/CharacterClass.swift +++ b/Sources/RegexBuilder/CharacterClass.swift @@ -21,19 +21,10 @@ public struct CharacterClass { } init(unconverted model: _CharacterClassModel) { - // FIXME: Implement in DSLTree instead of wrapping an AST atom - switch model.makeAST() { - case .atom(let atom): - self.ccc = .init(members: [.atom(.unconverted(atom))]) - default: - fatalError("Unsupported _CharacterClassModel") + guard let ccc = model.makeDSLTreeCharacterClass() else { + fatalError("Unsupported character class") } - } - - init(property: AST.Atom.CharacterProperty) { - // FIXME: Implement in DSLTree instead of wrapping an AST atom - let astAtom = AST.Atom(.property(property), .fake) - self.ccc = .init(members: [.atom(.unconverted(astAtom))]) + self.ccc = ccc } } @@ -119,11 +110,7 @@ extension RegexComponent where Self == CharacterClass { @available(SwiftStdlib 5.7, *) extension CharacterClass { public static func generalCategory(_ category: Unicode.GeneralCategory) -> CharacterClass { - guard let extendedCategory = category.extendedGeneralCategory else { - fatalError("Unexpected general category") - } - return CharacterClass(property: - .init(.generalCategory(extendedCategory), isInverted: false, isPOSIX: false)) + return CharacterClass(.generalCategory(category)) } } @@ -144,44 +131,6 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass { return CharacterClass(ccc) } -extension Unicode.GeneralCategory { - var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? { - switch self { - case .uppercaseLetter: return .uppercaseLetter - case .lowercaseLetter: return .lowercaseLetter - case .titlecaseLetter: return .titlecaseLetter - case .modifierLetter: return .modifierLetter - case .otherLetter: return .otherLetter - case .nonspacingMark: return .nonspacingMark - case .spacingMark: return .spacingMark - case .enclosingMark: return .enclosingMark - case .decimalNumber: return .decimalNumber - case .letterNumber: return .letterNumber - case .otherNumber: return .otherNumber - case .connectorPunctuation: return .connectorPunctuation - case .dashPunctuation: return .dashPunctuation - case .openPunctuation: return .openPunctuation - case .closePunctuation: return .closePunctuation - case .initialPunctuation: return .initialPunctuation - case .finalPunctuation: return .finalPunctuation - case .otherPunctuation: return .otherPunctuation - case .mathSymbol: return .mathSymbol - case .currencySymbol: return .currencySymbol - case .modifierSymbol: return .modifierSymbol - case .otherSymbol: return .otherSymbol - case .spaceSeparator: return .spaceSeparator - case .lineSeparator: return .lineSeparator - case .paragraphSeparator: return .paragraphSeparator - case .control: return .control - case .format: return .format - case .surrogate: return .surrogate - case .privateUse: return .privateUse - case .unassigned: return .unassigned - @unknown default: return nil - } - } -} - // MARK: - Set algebra methods @available(SwiftStdlib 5.7, *) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 058480feb..02cc1ccb6 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -26,22 +26,22 @@ extension Compiler.ByteCodeGen { try emitScalar(s) case let .assertion(kind): - try emitAssertion(kind) + try emitAssertion(kind.ast) case let .backreference(ref): - try emitBackreference(ref) + try emitBackreference(ref.ast) case let .symbolicReference(id): builder.buildUnresolvedReference(id: id) case let .changeMatchingOptions(optionSequence): - options.apply(optionSequence) + options.apply(optionSequence.ast) case let .unconverted(astAtom): - if let consumer = try astAtom.generateConsumer(options) { + if let consumer = try astAtom.ast.generateConsumer(options) { builder.buildConsume(by: consumer) } else { - throw Unsupported("\(astAtom._patternBase)") + throw Unsupported("\(astAtom.ast._patternBase)") } } } @@ -370,9 +370,9 @@ extension Compiler.ByteCodeGen { let updatedKind: AST.Quantification.Kind switch kind { case .explicit(let kind): - updatedKind = kind + updatedKind = kind.ast case .syntax(let kind): - updatedKind = kind.applying(options) + updatedKind = kind.ast.applying(options) case .default: updatedKind = options.isReluctantByDefault ? .reluctant @@ -610,7 +610,7 @@ extension Compiler.ByteCodeGen { throw Unsupported("Conditionals") case let .quantification(amt, kind, child): - try emitQuantification(amt.ast, kind.ast, child) + try emitQuantification(amt.ast, kind, child) case let .customCharacterClass(ccc): if ccc.containsAny { diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index ecb7d1356..58a7b551c 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -105,7 +105,7 @@ extension DSLTree.Atom { return nil case let .unconverted(a): - return try a.generateConsumer(opts) + return try a.ast.generateConsumer(opts) } } diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 7ebbf3758..b5ed3701b 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -109,7 +109,7 @@ extension PrettyPrinter { case let .quantification(amount, kind, child): let amount = amount.ast._patternBase - let kind = kind.ast._patternBase + let kind = (kind.ast ?? .eager)._patternBase printBlock("\(amount)(\(kind))") { printer in printer.printAsPattern(convertedFromAST: child) } @@ -129,7 +129,7 @@ extension PrettyPrinter { case let .unconverted(a): // TODO: is this always right? // TODO: Convert built-in character classes - print(a._patternBase) + print(a.ast._patternBase) case .assertion: print("/* TODO: assertions */") @@ -400,11 +400,6 @@ extension AST.Quantification.Kind { extension DSLTree.QuantificationKind { var _patternBase: String { - switch self { - case .explicit(let kind), .syntax(let kind): - return kind._patternBase - case .default: - return ".eager" - } + (ast ?? .eager)._patternBase } } diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index 8385ff289..ef98a7b8f 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -202,20 +202,20 @@ extension AST.CustomCharacterClass { extension AST.Atom { var dslTreeAtom: DSLTree.Atom { if let kind = assertionKind { - return .assertion(kind) + return .assertion(.init(ast: kind)) } switch self.kind { case let .char(c): return .char(c) case let .scalar(s): return .scalar(s) case .any: return .any - case let .backreference(r): return .backreference(r) - case let .changeMatchingOptions(seq): return .changeMatchingOptions(seq) + case let .backreference(r): return .backreference(.init(ast: r)) + case let .changeMatchingOptions(seq): return .changeMatchingOptions(.init(ast: seq)) case .escaped(let c) where c.scalarValue != nil: return .scalar(c.scalarValue!) - default: return .unconverted(self) + default: return .unconverted(.init(ast: self)) } } } diff --git a/Sources/_StringProcessing/Regex/DSLConsumers.swift b/Sources/_StringProcessing/Regex/DSLConsumers.swift index 99301623c..eb8ace8d3 100644 --- a/Sources/_StringProcessing/Regex/DSLConsumers.swift +++ b/Sources/_StringProcessing/Regex/DSLConsumers.swift @@ -21,16 +21,9 @@ public protocol CustomMatchingRegexComponent: RegexComponent { @available(SwiftStdlib 5.7, *) extension CustomMatchingRegexComponent { public var regex: Regex { -<<<<<<< HEAD - Regex(node: .matcher(RegexOutput.self, { input, index, bounds in - match(input, startingAt: index, in: bounds) - })) -======= - - let node: DSLTree.Node = .matcher(.init(RegexOutput.self), { input, index, bounds in + let node: DSLTree.Node = .matcher(RegexOutput.self, { input, index, bounds in try match(input, startingAt: index, in: bounds) }) return Regex(node: node) ->>>>>>> main } } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 3060c0b19..824682856 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -111,6 +111,14 @@ extension DSLTree { case explicit(_AST.QuantificationKind) /// A kind set via syntax, which can be affected by options. case syntax(_AST.QuantificationKind) + + var ast: AST.Quantification.Kind? { + switch self { + case .default: return nil + case .explicit(let kind), .syntax(let kind): + return kind.ast + } + } } @_spi(RegexBuilder) @@ -134,6 +142,12 @@ extension DSLTree { self.isInverted = isInverted } + public static func generalCategory(_ category: Unicode.GeneralCategory) -> Self { + let property = AST.Atom.CharacterProperty(.generalCategory(category.extendedGeneralCategory!), isInverted: false, isPOSIX: false) + let astAtom = AST.Atom(.property(property), .fake) + return .init(members: [.atom(.unconverted(.init(ast: astAtom)))]) + } + public var inverted: CustomCharacterClass { var result = self result.isInverted.toggle() @@ -162,13 +176,51 @@ extension DSLTree { case scalar(Unicode.Scalar) case any - case assertion(AST.Atom.AssertionKind) - case backreference(AST.Reference) + case assertion(_AST.AssertionKind) + case backreference(_AST.Reference) case symbolicReference(ReferenceID) - case changeMatchingOptions(AST.MatchingOptionSequence) + case changeMatchingOptions(_AST.MatchingOptionSequence) + + case unconverted(_AST.Atom) + } +} - case unconverted(AST.Atom) +extension Unicode.GeneralCategory { + var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? { + switch self { + case .uppercaseLetter: return .uppercaseLetter + case .lowercaseLetter: return .lowercaseLetter + case .titlecaseLetter: return .titlecaseLetter + case .modifierLetter: return .modifierLetter + case .otherLetter: return .otherLetter + case .nonspacingMark: return .nonspacingMark + case .spacingMark: return .spacingMark + case .enclosingMark: return .enclosingMark + case .decimalNumber: return .decimalNumber + case .letterNumber: return .letterNumber + case .otherNumber: return .otherNumber + case .connectorPunctuation: return .connectorPunctuation + case .dashPunctuation: return .dashPunctuation + case .openPunctuation: return .openPunctuation + case .closePunctuation: return .closePunctuation + case .initialPunctuation: return .initialPunctuation + case .finalPunctuation: return .finalPunctuation + case .otherPunctuation: return .otherPunctuation + case .mathSymbol: return .mathSymbol + case .currencySymbol: return .currencySymbol + case .modifierSymbol: return .modifierSymbol + case .otherSymbol: return .otherSymbol + case .spaceSeparator: return .spaceSeparator + case .lineSeparator: return .lineSeparator + case .paragraphSeparator: return .paragraphSeparator + case .control: return .control + case .format: return .format + case .surrogate: return .surrogate + case .privateUse: return .privateUse + case .unassigned: return .unassigned + @unknown default: return nil + } } } @@ -469,13 +521,13 @@ extension DSLTree { public struct GroupKind { internal var ast: AST.Group.Kind - @_spi(RegexBuilder) public static var atomicNonCapturing: Self { + public static var atomicNonCapturing: Self { .init(ast: .atomicNonCapturing) } - @_spi(RegexBuilder) public static var lookahead: Self { + public static var lookahead: Self { .init(ast: .lookahead) } - @_spi(RegexBuilder) public static var negativeLookahead: Self { + public static var negativeLookahead: Self { .init(ast: .negativeLookahead) } } @@ -489,13 +541,13 @@ extension DSLTree { public struct QuantificationKind { internal var ast: AST.Quantification.Kind - @_spi(RegexBuilder) public static var eager: Self { + public static var eager: Self { .init(ast: .eager) } - @_spi(RegexBuilder) public static var reluctant: Self { + public static var reluctant: Self { .init(ast: .reluctant) } - @_spi(RegexBuilder) public static var possessive: Self { + public static var possessive: Self { .init(ast: .possessive) } } @@ -504,25 +556,25 @@ extension DSLTree { public struct QuantificationAmount { internal var ast: AST.Quantification.Amount - @_spi(RegexBuilder) public static var zeroOrMore: Self { + public static var zeroOrMore: Self { .init(ast: .zeroOrMore) } - @_spi(RegexBuilder) public static var oneOrMore: Self { + public static var oneOrMore: Self { .init(ast: .oneOrMore) } - @_spi(RegexBuilder) public static var zeroOrOne: Self { + public static var zeroOrOne: Self { .init(ast: .zeroOrOne) } - @_spi(RegexBuilder) public static func exactly(_ n: Int) -> Self { + public static func exactly(_ n: Int) -> Self { .init(ast: .exactly(.init(faking: n))) } - @_spi(RegexBuilder) public static func nOrMore(_ n: Int) -> Self { + public static func nOrMore(_ n: Int) -> Self { .init(ast: .nOrMore(.init(faking: n))) } - @_spi(RegexBuilder) public static func upToN(_ n: Int) -> Self { + public static func upToN(_ n: Int) -> Self { .init(ast: .upToN(.init(faking: n))) } - @_spi(RegexBuilder) public static func range(_ lower: Int, _ upper: Int) -> Self { + public static func range(_ lower: Int, _ upper: Int) -> Self { .init(ast: .range(.init(faking: lower), .init(faking: upper))) } } @@ -536,5 +588,54 @@ extension DSLTree { public struct AbsentFunction { internal var ast: AST.AbsentFunction } + + @_spi(RegexBuilder) + public struct AssertionKind { + internal var ast: AST.Atom.AssertionKind + + public static func startOfSubject(_ inverted: Bool = false) -> Self { + .init(ast: .startOfSubject) + } + public static func endOfSubjectBeforeNewline(_ inverted: Bool = false) -> Self { + .init(ast: .endOfSubjectBeforeNewline) + } + public static func endOfSubject(_ inverted: Bool = false) -> Self { + .init(ast: .endOfSubject) + } + public static func firstMatchingPositionInSubject(_ inverted: Bool = false) -> Self { + .init(ast: .firstMatchingPositionInSubject) + } + public static func textSegmentBoundary(_ inverted: Bool = false) -> Self { + inverted + ? .init(ast: .notTextSegment) + : .init(ast: .textSegment) + } + public static func startOfLine(_ inverted: Bool = false) -> Self { + .init(ast: .startOfLine) + } + public static func endOfLine(_ inverted: Bool = false) -> Self { + .init(ast: .endOfLine) + } + public static func wordBoundary(_ inverted: Bool = false) -> Self { + inverted + ? .init(ast: .notWordBoundary) + : .init(ast: .wordBoundary) + } + } + + @_spi(RegexBuilder) + public struct Reference { + internal var ast: AST.Reference + } + + @_spi(RegexBuilder) + public struct MatchingOptionSequence { + internal var ast: AST.MatchingOptionSequence + } + + @_spi(RegexBuilder) + public struct Atom { + internal var ast: AST.Atom + } } } diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift index c9762f00e..a4d2280ec 100644 --- a/Sources/_StringProcessing/_CharacterClassModel.swift +++ b/Sources/_StringProcessing/_CharacterClassModel.swift @@ -294,7 +294,17 @@ extension _CharacterClassModel: CustomStringConvertible { } extension _CharacterClassModel { - public func makeAST() -> AST.Node? { + public func makeDSLTreeCharacterClass() -> DSLTree.CustomCharacterClass? { + // FIXME: Implement in DSLTree instead of wrapping an AST atom + switch makeAST() { + case .atom(let atom): + return .init(members: [.atom(.unconverted(.init(ast: atom)))]) + default: + return nil + } + } + + internal func makeAST() -> AST.Node? { let inv = isInverted func esc(_ b: AST.Atom.EscapedBuiltin) -> AST.Node { @@ -375,7 +385,7 @@ extension DSLTree.Atom { var characterClass: _CharacterClassModel? { switch self { case let .unconverted(a): - return a.characterClass + return a.ast.characterClass default: return nil } diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index 0a7d6fc59..bf4489a68 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -136,7 +136,7 @@ func customTest( class CustomRegexComponentTests: XCTestCase { // TODO: Refactor below into more exhaustive, declarative // tests. - func testCustomRegexComponents() { + func testCustomRegexComponents() throws { customTest( Regex { Numbler() @@ -178,14 +178,13 @@ class CustomRegexComponentTests: XCTestCase { } } - guard let res3 = "ab123c".firstMatch(of: regex3) else { - XCTFail() - return - } + let str = "ab123c" + let res3 = try XCTUnwrap(str.firstMatch(of: regex3)) - XCTAssertEqual(res3.range, "ab123c".index(atOffset: 2)..<"ab123c".index(atOffset: 5)) - XCTAssertEqual(res3.output.0, "123") - XCTAssertEqual(res3.output.1, "123") + let expectedSubstring = str.dropFirst(2).prefix(3) + XCTAssertEqual(res3.range, expectedSubstring.startIndex.. Date: Tue, 19 Apr 2022 00:57:37 -0500 Subject: [PATCH 4/5] Wrap DSLTree for internal _TreeNode conformance --- .../_StringProcessing/PrintAsPattern.swift | 2 +- Sources/_StringProcessing/Regex/DSLTree.swift | 151 +++++++++++------- 2 files changed, 94 insertions(+), 59 deletions(-) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index b5ed3701b..91626eb5c 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -68,7 +68,7 @@ extension PrettyPrinter { private mutating func printAsPattern( convertedFromAST node: DSLTree.Node ) { - if patternBackoff(node) { + if patternBackoff(DSLTree._Tree(node)) { printBackoff(node) return } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 824682856..ce5beeaca 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -24,7 +24,7 @@ public struct DSLTree { extension DSLTree { @_spi(RegexBuilder) - public indirect enum Node: _DSLTreeNode { + public indirect enum Node { /// Try to match each node in order /// /// ... | ... | ... @@ -347,65 +347,10 @@ extension DSLTree { var captureStructure: CaptureStructure { // TODO: nesting var constructor = CaptureStructure.Constructor(.flatten) - return root._captureStructure(&constructor) + return _Tree(root)._captureStructure(&constructor) } } extension DSLTree.Node { - @_spi(RegexBuilder) - public func _captureStructure( - _ constructor: inout CaptureStructure.Constructor - ) -> CaptureStructure { - switch self { - case let .orderedChoice(children): - return constructor.alternating(children) - - case let .concatenation(children): - return constructor.concatenating(children) - - case let .capture(name, _, child): - if let type = child.valueCaptureType { - return constructor.capturing( - name: name, child, withType: type) - } - return constructor.capturing(name: name, child) - - case let .nonCapturingGroup(kind, child): - assert(!kind.ast.isCapturing) - return constructor.grouping(child, as: kind.ast) - - case let .conditional(cond, trueBranch, falseBranch): - return constructor.condition( - cond.ast, - trueBranch: trueBranch, - falseBranch: falseBranch) - - case let .quantification(amount, _, child): - return constructor.quantifying( - child, amount: amount.ast) - - case let .regexLiteral(re): - // TODO: Force a re-nesting? - return re.ast._captureStructure(&constructor) - - case let .absentFunction(abs): - return constructor.absent(abs.ast.kind) - - case let .convertedRegexLiteral(n, _): - // TODO: Switch nesting strategy? - return n._captureStructure(&constructor) - - case .matcher: - return .empty - - case .transform(_, let child): - return child._captureStructure(&constructor) - - case .customCharacterClass, .atom, .trivia, .empty, - .quotedLiteral, .consumer, .characterPredicate: - return .empty - } - } - /// For typed capture-producing nodes, the type produced. var valueCaptureType: AnyType? { switch self { @@ -512,9 +457,99 @@ public struct CaptureTransform: Hashable, CustomStringConvertible { // // These wrapper types are required because even @_spi-marked public APIs can't // include symbols from implementation-only dependencies. -internal protocol _DSLTreeNode: _TreeNode {} extension DSLTree { + /// Presents a wrapped version of `DSLTree.Node` that can provide an internal + /// `_TreeNode` conformance. + struct _Tree: _TreeNode { + var node: DSLTree.Node + + init(_ node: DSLTree.Node) { + self.node = node + } + + var children: [_Tree]? { + switch node { + + case let .orderedChoice(v): return v.map(_Tree.init) + case let .concatenation(v): return v.map(_Tree.init) + + case let .convertedRegexLiteral(n, _): + // Treat this transparently + return _Tree(n).children + + case let .capture(_, _, n): return [_Tree(n)] + case let .nonCapturingGroup(_, n): return [_Tree(n)] + case let .transform(_, n): return [_Tree(n)] + case let .quantification(_, _, n): return [_Tree(n)] + + case let .conditional(_, t, f): return [_Tree(t), _Tree(f)] + + case .trivia, .empty, .quotedLiteral, .regexLiteral, + .consumer, .matcher, .characterPredicate, + .customCharacterClass, .atom: + return [] + + case let .absentFunction(abs): + return abs.ast.children.map(\.dslTreeNode).map(_Tree.init) + } + } + + func _captureStructure( + _ constructor: inout CaptureStructure.Constructor + ) -> CaptureStructure { + switch node { + case let .orderedChoice(children): + return constructor.alternating(children.map(_Tree.init)) + + case let .concatenation(children): + return constructor.concatenating(children.map(_Tree.init)) + + case let .capture(name, _, child): + if let type = child.valueCaptureType { + return constructor.capturing( + name: name, _Tree(child), withType: type) + } + return constructor.capturing(name: name, _Tree(child)) + + case let .nonCapturingGroup(kind, child): + assert(!kind.ast.isCapturing) + return constructor.grouping(_Tree(child), as: kind.ast) + + case let .conditional(cond, trueBranch, falseBranch): + return constructor.condition( + cond.ast, + trueBranch: _Tree(trueBranch), + falseBranch: _Tree(falseBranch)) + + case let .quantification(amount, _, child): + return constructor.quantifying( + Self(child), amount: amount.ast) + + case let .regexLiteral(re): + // TODO: Force a re-nesting? + return re.ast._captureStructure(&constructor) + + case let .absentFunction(abs): + return constructor.absent(abs.ast.kind) + + case let .convertedRegexLiteral(n, _): + // TODO: Switch nesting strategy? + return Self(n)._captureStructure(&constructor) + + case .matcher: + return .empty + + case .transform(_, let child): + return Self(child)._captureStructure(&constructor) + + case .customCharacterClass, .atom, .trivia, .empty, + .quotedLiteral, .consumer, .characterPredicate: + return .empty + } + } + } + @_spi(RegexBuilder) public enum _AST { @_spi(RegexBuilder) From 85e6564e195aa5c865c601142444cbafa6352c5d Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 19 Apr 2022 00:58:00 -0500 Subject: [PATCH 5/5] Eliminate one more straggling public AST type --- Sources/_StringProcessing/_CharacterClassModel.swift | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift index a4d2280ec..2debcda9d 100644 --- a/Sources/_StringProcessing/_CharacterClassModel.swift +++ b/Sources/_StringProcessing/_CharacterClassModel.swift @@ -28,7 +28,7 @@ public struct _CharacterClassModel: Hashable { var isInverted: Bool = false // TODO: Split out builtin character classes into their own type? - public enum Representation: Hashable { + public enum Representation: Hashable { /// Any character case any /// Any grapheme cluster @@ -52,10 +52,14 @@ public struct _CharacterClassModel: Hashable { case custom([CharacterSetComponent]) } - public typealias SetOperator = AST.CustomCharacterClass.SetOp + public enum SetOperator: Hashable { + case subtraction + case intersection + case symmetricDifference + } /// A binary set operation that forms a character class component. - public struct SetOperation: Hashable { + public struct SetOperation: Hashable { var lhs: CharacterSetComponent var op: SetOperator var rhs: CharacterSetComponent @@ -72,7 +76,7 @@ public struct _CharacterClassModel: Hashable { } } - public enum CharacterSetComponent: Hashable { + public enum CharacterSetComponent: Hashable { case character(Character) case range(ClosedRange)