diff --git a/Sources/RegexBuilder/Anchor.swift b/Sources/RegexBuilder/Anchor.swift index 55b554aea..e8cd4ac54 100644 --- a/Sources/RegexBuilder/Anchor.swift +++ b/Sources/RegexBuilder/Anchor.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -import _RegexParser +@_implementationOnly import _RegexParser @_spi(RegexBuilder) import _StringProcessing @available(SwiftStdlib 5.7, *) @@ -31,34 +31,21 @@ public struct Anchor { @available(SwiftStdlib 5.7, *) extension Anchor: RegexComponent { - var astAssertion: AST.Atom.AssertionKind { - if !isInverted { - switch kind { - case .startOfSubject: return .startOfSubject - case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline - case .endOfSubject: return .endOfSubject - case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject - case .textSegmentBoundary: return .textSegment - case .startOfLine: return .startOfLine - case .endOfLine: return .endOfLine - case .wordBoundary: return .wordBoundary - } - } else { - switch kind { - case .startOfSubject: fatalError("Not yet supported") - case .endOfSubjectBeforeNewline: fatalError("Not yet supported") - case .endOfSubject: fatalError("Not yet supported") - case .firstMatchingPositionInSubject: fatalError("Not yet supported") - case .textSegmentBoundary: return .notTextSegment - case .startOfLine: fatalError("Not yet supported") - case .endOfLine: fatalError("Not yet supported") - case .wordBoundary: return .notWordBoundary - } + var baseAssertion: DSLTree._AST.AssertionKind { + switch kind { + case .startOfSubject: return .startOfSubject(isInverted) + case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted) + case .endOfSubject: return .endOfSubject(isInverted) + case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted) + case .textSegmentBoundary: return .textSegmentBoundary(isInverted) + case .startOfLine: return .startOfLine(isInverted) + case .endOfLine: return .endOfLine(isInverted) + case .wordBoundary: return .wordBoundary(isInverted) } } public var regex: Regex { - Regex(node: .atom(.assertion(astAssertion))) + Regex(node: .atom(.assertion(baseAssertion))) } } diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift index d163c336b..0087d734a 100644 --- a/Sources/RegexBuilder/CharacterClass.swift +++ b/Sources/RegexBuilder/CharacterClass.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -import _RegexParser +@_implementationOnly import _RegexParser @_spi(RegexBuilder) import _StringProcessing @available(SwiftStdlib 5.7, *) @@ -21,19 +21,10 @@ public struct CharacterClass { } init(unconverted model: _CharacterClassModel) { - // FIXME: Implement in DSLTree instead of wrapping an AST atom - switch model.makeAST() { - case .atom(let atom): - self.ccc = .init(members: [.atom(.unconverted(atom))]) - default: - fatalError("Unsupported _CharacterClassModel") + guard let ccc = model.makeDSLTreeCharacterClass() else { + fatalError("Unsupported character class") } - } - - init(property: AST.Atom.CharacterProperty) { - // FIXME: Implement in DSLTree instead of wrapping an AST atom - let astAtom = AST.Atom(.property(property), .fake) - self.ccc = .init(members: [.atom(.unconverted(astAtom))]) + self.ccc = ccc } } @@ -119,11 +110,7 @@ extension RegexComponent where Self == CharacterClass { @available(SwiftStdlib 5.7, *) extension CharacterClass { public static func generalCategory(_ category: Unicode.GeneralCategory) -> CharacterClass { - guard let extendedCategory = category.extendedGeneralCategory else { - fatalError("Unexpected general category") - } - return CharacterClass(property: - .init(.generalCategory(extendedCategory), isInverted: false, isPOSIX: false)) + return CharacterClass(.generalCategory(category)) } } @@ -144,44 +131,6 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass { return CharacterClass(ccc) } -extension Unicode.GeneralCategory { - var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? { - switch self { - case .uppercaseLetter: return .uppercaseLetter - case .lowercaseLetter: return .lowercaseLetter - case .titlecaseLetter: return .titlecaseLetter - case .modifierLetter: return .modifierLetter - case .otherLetter: return .otherLetter - case .nonspacingMark: return .nonspacingMark - case .spacingMark: return .spacingMark - case .enclosingMark: return .enclosingMark - case .decimalNumber: return .decimalNumber - case .letterNumber: return .letterNumber - case .otherNumber: return .otherNumber - case .connectorPunctuation: return .connectorPunctuation - case .dashPunctuation: return .dashPunctuation - case .openPunctuation: return .openPunctuation - case .closePunctuation: return .closePunctuation - case .initialPunctuation: return .initialPunctuation - case .finalPunctuation: return .finalPunctuation - case .otherPunctuation: return .otherPunctuation - case .mathSymbol: return .mathSymbol - case .currencySymbol: return .currencySymbol - case .modifierSymbol: return .modifierSymbol - case .otherSymbol: return .otherSymbol - case .spaceSeparator: return .spaceSeparator - case .lineSeparator: return .lineSeparator - case .paragraphSeparator: return .paragraphSeparator - case .control: return .control - case .format: return .format - case .surrogate: return .surrogate - case .privateUse: return .privateUse - case .unassigned: return .unassigned - @unknown default: return nil - } - } -} - // MARK: - Set algebra methods @available(SwiftStdlib 5.7, *) diff --git a/Sources/RegexBuilder/DSL.swift b/Sources/RegexBuilder/DSL.swift index 86ec0bee5..97bc35154 100644 --- a/Sources/RegexBuilder/DSL.swift +++ b/Sources/RegexBuilder/DSL.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -import _RegexParser +@_implementationOnly import _RegexParser @_spi(RegexBuilder) import _StringProcessing @available(SwiftStdlib 5.7, *) @@ -105,7 +105,7 @@ public struct QuantificationBehavior { var kind: Kind - internal var astKind: AST.Quantification.Kind { + internal var astKind: DSLTree._AST.QuantificationKind { switch kind { case .eagerly: return .eager case .reluctantly: return .reluctant @@ -136,13 +136,13 @@ extension DSLTree.Node { return .quantification(.oneOrMore, kind, node) case _ where range.count == 1: // ..<1 or ...0 or any range with count == 1 // Note: `behavior` is ignored in this case - return .quantification(.exactly(.init(faking: range.lowerBound)), .default, node) + return .quantification(.exactly(range.lowerBound), .default, node) case (0, _): // 0.. 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -720,7 +719,7 @@ extension Repeat { ) where RegexOutput == Substring { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -835,7 +834,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?), Component.RegexOutput == (W, C0) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -845,7 +844,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?), Component.RegexOutput == (W, C0) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -958,7 +957,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?), Component.RegexOutput == (W, C0, C1) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -968,7 +967,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?), Component.RegexOutput == (W, C0, C1) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1081,7 +1080,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?), Component.RegexOutput == (W, C0, C1, C2) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1091,7 +1090,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?), Component.RegexOutput == (W, C0, C1, C2) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1204,7 +1203,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?), Component.RegexOutput == (W, C0, C1, C2, C3) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1214,7 +1213,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?), Component.RegexOutput == (W, C0, C1, C2, C3) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1327,7 +1326,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C0, C1, C2, C3, C4) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1337,7 +1336,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C0, C1, C2, C3, C4) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1450,7 +1449,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1460,7 +1459,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1573,7 +1572,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1583,7 +1582,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1696,7 +1695,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1706,7 +1705,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1819,7 +1818,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1829,7 +1828,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1942,7 +1941,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } @available(SwiftStdlib 5.7, *) @@ -1952,7 +1951,7 @@ extension Repeat { ) where RegexOutput == (Substring, C0?, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } @available(SwiftStdlib 5.7, *) diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index d1cb41810..50f09700e 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -121,7 +121,6 @@ struct VariadicsGenerator: ParsableCommand { // BEGIN AUTO-GENERATED CONTENT - import _RegexParser @_spi(RegexBuilder) import _StringProcessing @@ -490,7 +489,7 @@ struct VariadicsGenerator: ParsableCommand { ) \(params.whereClauseForInit) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root)) + self.init(node: .quantification(.exactly(count), .default, component.regex.root)) } \(defaultAvailableAttr) @@ -501,7 +500,7 @@ struct VariadicsGenerator: ParsableCommand { ) \(params.whereClauseForInit) { assert(count > 0, "Must specify a positive count") // TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)` - self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root)) + self.init(node: .quantification(.exactly(count), .default, component().regex.root)) } \(defaultAvailableAttr) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index b6f9b4732..02cc1ccb6 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -26,22 +26,22 @@ extension Compiler.ByteCodeGen { try emitScalar(s) case let .assertion(kind): - try emitAssertion(kind) + try emitAssertion(kind.ast) case let .backreference(ref): - try emitBackreference(ref) + try emitBackreference(ref.ast) case let .symbolicReference(id): builder.buildUnresolvedReference(id: id) case let .changeMatchingOptions(optionSequence): - options.apply(optionSequence) + options.apply(optionSequence.ast) case let .unconverted(astAtom): - if let consumer = try astAtom.generateConsumer(options) { + if let consumer = try astAtom.ast.generateConsumer(options) { builder.buildConsume(by: consumer) } else { - throw Unsupported("\(astAtom._patternBase)") + throw Unsupported("\(astAtom.ast._patternBase)") } } } @@ -370,9 +370,9 @@ extension Compiler.ByteCodeGen { let updatedKind: AST.Quantification.Kind switch kind { case .explicit(let kind): - updatedKind = kind + updatedKind = kind.ast case .syntax(let kind): - updatedKind = kind.applying(options) + updatedKind = kind.ast.applying(options) case .default: updatedKind = options.isReluctantByDefault ? .reluctant @@ -604,13 +604,13 @@ extension Compiler.ByteCodeGen { } case let .nonCapturingGroup(kind, child): - try emitNoncapturingGroup(kind, child) + try emitNoncapturingGroup(kind.ast, child) case .conditional: throw Unsupported("Conditionals") case let .quantification(amt, kind, child): - try emitQuantification(amt, kind, child) + try emitQuantification(amt.ast, kind, child) case let .customCharacterClass(ccc): if ccc.containsAny { @@ -646,7 +646,7 @@ extension Compiler.ByteCodeGen { } case let .regexLiteral(l): - try emitNode(l.dslTreeNode) + try emitNode(l.ast.dslTreeNode) case let .convertedRegexLiteral(n, _): try emitNode(n) diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index ecb7d1356..58a7b551c 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -105,7 +105,7 @@ extension DSLTree.Atom { return nil case let .unconverted(a): - return try a.generateConsumer(opts) + return try a.ast.generateConsumer(opts) } } diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 4d135898b..91626eb5c 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -68,7 +68,7 @@ extension PrettyPrinter { private mutating func printAsPattern( convertedFromAST node: DSLTree.Node ) { - if patternBackoff(node) { + if patternBackoff(DSLTree._Tree(node)) { printBackoff(node) return } @@ -90,7 +90,7 @@ extension PrettyPrinter { } case let .nonCapturingGroup(kind, child): - let kind = kind._patternBase + let kind = kind.ast._patternBase printBlock("Group(\(kind))") { printer in printer.printAsPattern(convertedFromAST: child) } @@ -108,8 +108,8 @@ extension PrettyPrinter { print("/* TODO: conditional */") case let .quantification(amount, kind, child): - let amount = amount._patternBase - let kind = kind._patternBase + let amount = amount.ast._patternBase + let kind = (kind.ast ?? .eager)._patternBase printBlock("\(amount)(\(kind))") { printer in printer.printAsPattern(convertedFromAST: child) } @@ -129,7 +129,7 @@ extension PrettyPrinter { case let .unconverted(a): // TODO: is this always right? // TODO: Convert built-in character classes - print(a._patternBase) + print(a.ast._patternBase) case .assertion: print("/* TODO: assertions */") @@ -400,11 +400,6 @@ extension AST.Quantification.Kind { extension DSLTree.QuantificationKind { var _patternBase: String { - switch self { - case .explicit(let kind), .syntax(let kind): - return kind._patternBase - case .default: - return ".eager" - } + (ast ?? .eager)._patternBase } } diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index 8acbd3b1b..ef98a7b8f 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -40,7 +40,7 @@ extension AST.Node { // TODO: Should we do this for the // single-concatenation child too, or should? // we wrap _that_? - return .convertedRegexLiteral(node, self) + return .convertedRegexLiteral(node, .init(ast: self)) } // Convert the top-level node without wrapping @@ -111,19 +111,19 @@ extension AST.Node { case .balancedCapture: throw Unsupported("TODO: balanced captures") default: - return .nonCapturingGroup(v.kind.value, child) + return .nonCapturingGroup(.init(ast: v.kind.value), child) } case let .conditional(v): let trueBranch = v.trueBranch.dslTreeNode let falseBranch = v.falseBranch.dslTreeNode return .conditional( - v.condition.kind, trueBranch, falseBranch) + .init(ast: v.condition.kind), trueBranch, falseBranch) case let .quantification(v): let child = v.child.dslTreeNode return .quantification( - v.amount.value, .syntax(v.kind.value), child) + .init(ast: v.amount.value), .syntax(.init(ast: v.kind.value)), child) case let .quote(v): return .quotedLiteral(v.literal) @@ -140,9 +140,9 @@ extension AST.Node { case .empty(_): return .empty - case let .absentFunction(a): + case let .absentFunction(abs): // TODO: What should this map to? - return .absentFunction(a) + return .absentFunction(.init(ast: abs)) } } @@ -202,20 +202,20 @@ extension AST.CustomCharacterClass { extension AST.Atom { var dslTreeAtom: DSLTree.Atom { if let kind = assertionKind { - return .assertion(kind) + return .assertion(.init(ast: kind)) } switch self.kind { case let .char(c): return .char(c) case let .scalar(s): return .scalar(s) case .any: return .any - case let .backreference(r): return .backreference(r) - case let .changeMatchingOptions(seq): return .changeMatchingOptions(seq) + case let .backreference(r): return .backreference(.init(ast: r)) + case let .changeMatchingOptions(seq): return .changeMatchingOptions(.init(ast: seq)) case .escaped(let c) where c.scalarValue != nil: return .scalar(c.scalarValue!) - default: return .unconverted(self) + default: return .unconverted(.init(ast: self)) } } } diff --git a/Sources/_StringProcessing/Regex/DSLConsumers.swift b/Sources/_StringProcessing/Regex/DSLConsumers.swift index ea46c789b..eb8ace8d3 100644 --- a/Sources/_StringProcessing/Regex/DSLConsumers.swift +++ b/Sources/_StringProcessing/Regex/DSLConsumers.swift @@ -21,8 +21,7 @@ public protocol CustomMatchingRegexComponent: RegexComponent { @available(SwiftStdlib 5.7, *) extension CustomMatchingRegexComponent { public var regex: Regex { - - let node: DSLTree.Node = .matcher(.init(RegexOutput.self), { input, index, bounds in + let node: DSLTree.Node = .matcher(RegexOutput.self, { input, index, bounds in try match(input, startingAt: index, in: bounds) }) return Regex(node: node) diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 51f5ea36f..ce5beeaca 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -24,7 +24,7 @@ public struct DSLTree { extension DSLTree { @_spi(RegexBuilder) - public indirect enum Node: _TreeNode { + public indirect enum Node { /// Try to match each node in order /// /// ... | ... | ... @@ -42,7 +42,7 @@ extension DSLTree { name: String? = nil, reference: ReferenceID? = nil, Node) /// Match a (non-capturing) subpattern / group - case nonCapturingGroup(AST.Group.Kind, Node) + case nonCapturingGroup(_AST.GroupKind, Node) // TODO: Consider splitting off grouped conditions, or have // our own kind @@ -52,10 +52,10 @@ extension DSLTree { /// (?(cond) true-branch | false-branch) /// case conditional( - AST.Conditional.Condition.Kind, Node, Node) + _AST.ConditionKind, Node, Node) case quantification( - AST.Quantification.Amount, + _AST.QuantificationAmount, QuantificationKind, Node) @@ -74,19 +74,19 @@ extension DSLTree { case quotedLiteral(String) /// An embedded literal - case regexLiteral(AST.Node) + case regexLiteral(_AST.ASTNode) // TODO: What should we do here? /// /// TODO: Consider splitting off expression functions, or have our own kind - case absentFunction(AST.AbsentFunction) + case absentFunction(_AST.AbsentFunction) // MARK: - Tree conversions /// The target of AST conversion. /// /// Keeps original AST around for rich syntactic and source information - case convertedRegexLiteral(Node, AST.Node) + case convertedRegexLiteral(Node, _AST.ASTNode) // MARK: - Extensibility points @@ -95,7 +95,7 @@ extension DSLTree { case consumer(_ConsumerInterface) - case matcher(AnyType, _MatcherInterface) + case matcher(Any.Type, _MatcherInterface) // TODO: Would this just boil down to a consumer? case characterPredicate(_CharacterPredicateInterface) @@ -108,9 +108,17 @@ extension DSLTree { /// The default quantification kind, as set by options. case `default` /// An explicitly chosen kind, overriding any options. - case explicit(AST.Quantification.Kind) + case explicit(_AST.QuantificationKind) /// A kind set via syntax, which can be affected by options. - case syntax(AST.Quantification.Kind) + case syntax(_AST.QuantificationKind) + + var ast: AST.Quantification.Kind? { + switch self { + case .default: return nil + case .explicit(let kind), .syntax(let kind): + return kind.ast + } + } } @_spi(RegexBuilder) @@ -134,6 +142,12 @@ extension DSLTree { self.isInverted = isInverted } + public static func generalCategory(_ category: Unicode.GeneralCategory) -> Self { + let property = AST.Atom.CharacterProperty(.generalCategory(category.extendedGeneralCategory!), isInverted: false, isPOSIX: false) + let astAtom = AST.Atom(.property(property), .fake) + return .init(members: [.atom(.unconverted(.init(ast: astAtom)))]) + } + public var inverted: CustomCharacterClass { var result = self result.isInverted.toggle() @@ -162,13 +176,51 @@ extension DSLTree { case scalar(Unicode.Scalar) case any - case assertion(AST.Atom.AssertionKind) - case backreference(AST.Reference) + case assertion(_AST.AssertionKind) + case backreference(_AST.Reference) case symbolicReference(ReferenceID) - case changeMatchingOptions(AST.MatchingOptionSequence) + case changeMatchingOptions(_AST.MatchingOptionSequence) + + case unconverted(_AST.Atom) + } +} - case unconverted(AST.Atom) +extension Unicode.GeneralCategory { + var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? { + switch self { + case .uppercaseLetter: return .uppercaseLetter + case .lowercaseLetter: return .lowercaseLetter + case .titlecaseLetter: return .titlecaseLetter + case .modifierLetter: return .modifierLetter + case .otherLetter: return .otherLetter + case .nonspacingMark: return .nonspacingMark + case .spacingMark: return .spacingMark + case .enclosingMark: return .enclosingMark + case .decimalNumber: return .decimalNumber + case .letterNumber: return .letterNumber + case .otherNumber: return .otherNumber + case .connectorPunctuation: return .connectorPunctuation + case .dashPunctuation: return .dashPunctuation + case .openPunctuation: return .openPunctuation + case .closePunctuation: return .closePunctuation + case .initialPunctuation: return .initialPunctuation + case .finalPunctuation: return .finalPunctuation + case .otherPunctuation: return .otherPunctuation + case .mathSymbol: return .mathSymbol + case .currencySymbol: return .currencySymbol + case .modifierSymbol: return .modifierSymbol + case .otherSymbol: return .otherSymbol + case .spaceSeparator: return .spaceSeparator + case .lineSeparator: return .lineSeparator + case .paragraphSeparator: return .paragraphSeparator + case .control: return .control + case .format: return .format + case .surrogate: return .surrogate + case .privateUse: return .privateUse + case .unassigned: return .unassigned + @unknown default: return nil + } } } @@ -226,8 +278,8 @@ extension DSLTree.Node { .customCharacterClass, .atom: return [] - case let .absentFunction(a): - return a.children.map(\.dslTreeNode) + case let .absentFunction(abs): + return abs.ast.children.map(\.dslTreeNode) } } } @@ -235,8 +287,8 @@ extension DSLTree.Node { extension DSLTree.Node { var astNode: AST.Node? { switch self { - case let .regexLiteral(ast): return ast - case let .convertedRegexLiteral(_, ast): return ast + case let .regexLiteral(literal): return literal.ast + case let .convertedRegexLiteral(_, literal): return literal.ast default: return nil } } @@ -280,9 +332,9 @@ extension DSLTree.Node { case .capture: return true case let .regexLiteral(re): - return re.hasCapture + return re.ast.hasCapture case let .convertedRegexLiteral(n, re): - assert(n.hasCapture == re.hasCapture) + assert(n.hasCapture == re.ast.hasCapture) return n.hasCapture default: @@ -295,70 +347,15 @@ extension DSLTree { var captureStructure: CaptureStructure { // TODO: nesting var constructor = CaptureStructure.Constructor(.flatten) - return root._captureStructure(&constructor) + return _Tree(root)._captureStructure(&constructor) } } extension DSLTree.Node { - @_spi(RegexBuilder) - public func _captureStructure( - _ constructor: inout CaptureStructure.Constructor - ) -> CaptureStructure { - switch self { - case let .orderedChoice(children): - return constructor.alternating(children) - - case let .concatenation(children): - return constructor.concatenating(children) - - case let .capture(name, _, child): - if let type = child.valueCaptureType { - return constructor.capturing( - name: name, child, withType: type) - } - return constructor.capturing(name: name, child) - - case let .nonCapturingGroup(kind, child): - assert(!kind.isCapturing) - return constructor.grouping(child, as: kind) - - case let .conditional(cond, trueBranch, falseBranch): - return constructor.condition( - cond, - trueBranch: trueBranch, - falseBranch: falseBranch) - - case let .quantification(amount, _, child): - return constructor.quantifying( - child, amount: amount) - - case let .regexLiteral(re): - // TODO: Force a re-nesting? - return re._captureStructure(&constructor) - - case let .absentFunction(abs): - return constructor.absent(abs.kind) - - case let .convertedRegexLiteral(n, _): - // TODO: Switch nesting strategy? - return n._captureStructure(&constructor) - - case .matcher: - return .empty - - case .transform(_, let child): - return child._captureStructure(&constructor) - - case .customCharacterClass, .atom, .trivia, .empty, - .quotedLiteral, .consumer, .characterPredicate: - return .empty - } - } - /// For typed capture-producing nodes, the type produced. var valueCaptureType: AnyType? { switch self { case let .matcher(t, _): - return t + return AnyType(t) case let .transform(t, _): return AnyType(t.resultType) default: return nil @@ -455,3 +452,225 @@ public struct CaptureTransform: Hashable, CustomStringConvertible { "" } } + +// MARK: AST wrapper types +// +// These wrapper types are required because even @_spi-marked public APIs can't +// include symbols from implementation-only dependencies. + +extension DSLTree { + /// Presents a wrapped version of `DSLTree.Node` that can provide an internal + /// `_TreeNode` conformance. + struct _Tree: _TreeNode { + var node: DSLTree.Node + + init(_ node: DSLTree.Node) { + self.node = node + } + + var children: [_Tree]? { + switch node { + + case let .orderedChoice(v): return v.map(_Tree.init) + case let .concatenation(v): return v.map(_Tree.init) + + case let .convertedRegexLiteral(n, _): + // Treat this transparently + return _Tree(n).children + + case let .capture(_, _, n): return [_Tree(n)] + case let .nonCapturingGroup(_, n): return [_Tree(n)] + case let .transform(_, n): return [_Tree(n)] + case let .quantification(_, _, n): return [_Tree(n)] + + case let .conditional(_, t, f): return [_Tree(t), _Tree(f)] + + case .trivia, .empty, .quotedLiteral, .regexLiteral, + .consumer, .matcher, .characterPredicate, + .customCharacterClass, .atom: + return [] + + case let .absentFunction(abs): + return abs.ast.children.map(\.dslTreeNode).map(_Tree.init) + } + } + + func _captureStructure( + _ constructor: inout CaptureStructure.Constructor + ) -> CaptureStructure { + switch node { + case let .orderedChoice(children): + return constructor.alternating(children.map(_Tree.init)) + + case let .concatenation(children): + return constructor.concatenating(children.map(_Tree.init)) + + case let .capture(name, _, child): + if let type = child.valueCaptureType { + return constructor.capturing( + name: name, _Tree(child), withType: type) + } + return constructor.capturing(name: name, _Tree(child)) + + case let .nonCapturingGroup(kind, child): + assert(!kind.ast.isCapturing) + return constructor.grouping(_Tree(child), as: kind.ast) + + case let .conditional(cond, trueBranch, falseBranch): + return constructor.condition( + cond.ast, + trueBranch: _Tree(trueBranch), + falseBranch: _Tree(falseBranch)) + + case let .quantification(amount, _, child): + return constructor.quantifying( + Self(child), amount: amount.ast) + + case let .regexLiteral(re): + // TODO: Force a re-nesting? + return re.ast._captureStructure(&constructor) + + case let .absentFunction(abs): + return constructor.absent(abs.ast.kind) + + case let .convertedRegexLiteral(n, _): + // TODO: Switch nesting strategy? + return Self(n)._captureStructure(&constructor) + + case .matcher: + return .empty + + case .transform(_, let child): + return Self(child)._captureStructure(&constructor) + + case .customCharacterClass, .atom, .trivia, .empty, + .quotedLiteral, .consumer, .characterPredicate: + return .empty + } + } + } + + @_spi(RegexBuilder) + public enum _AST { + @_spi(RegexBuilder) + public struct GroupKind { + internal var ast: AST.Group.Kind + + public static var atomicNonCapturing: Self { + .init(ast: .atomicNonCapturing) + } + public static var lookahead: Self { + .init(ast: .lookahead) + } + public static var negativeLookahead: Self { + .init(ast: .negativeLookahead) + } + } + + @_spi(RegexBuilder) + public struct ConditionKind { + internal var ast: AST.Conditional.Condition.Kind + } + + @_spi(RegexBuilder) + public struct QuantificationKind { + internal var ast: AST.Quantification.Kind + + public static var eager: Self { + .init(ast: .eager) + } + public static var reluctant: Self { + .init(ast: .reluctant) + } + public static var possessive: Self { + .init(ast: .possessive) + } + } + + @_spi(RegexBuilder) + public struct QuantificationAmount { + internal var ast: AST.Quantification.Amount + + public static var zeroOrMore: Self { + .init(ast: .zeroOrMore) + } + public static var oneOrMore: Self { + .init(ast: .oneOrMore) + } + public static var zeroOrOne: Self { + .init(ast: .zeroOrOne) + } + public static func exactly(_ n: Int) -> Self { + .init(ast: .exactly(.init(faking: n))) + } + public static func nOrMore(_ n: Int) -> Self { + .init(ast: .nOrMore(.init(faking: n))) + } + public static func upToN(_ n: Int) -> Self { + .init(ast: .upToN(.init(faking: n))) + } + public static func range(_ lower: Int, _ upper: Int) -> Self { + .init(ast: .range(.init(faking: lower), .init(faking: upper))) + } + } + + @_spi(RegexBuilder) + public struct ASTNode { + internal var ast: AST.Node + } + + @_spi(RegexBuilder) + public struct AbsentFunction { + internal var ast: AST.AbsentFunction + } + + @_spi(RegexBuilder) + public struct AssertionKind { + internal var ast: AST.Atom.AssertionKind + + public static func startOfSubject(_ inverted: Bool = false) -> Self { + .init(ast: .startOfSubject) + } + public static func endOfSubjectBeforeNewline(_ inverted: Bool = false) -> Self { + .init(ast: .endOfSubjectBeforeNewline) + } + public static func endOfSubject(_ inverted: Bool = false) -> Self { + .init(ast: .endOfSubject) + } + public static func firstMatchingPositionInSubject(_ inverted: Bool = false) -> Self { + .init(ast: .firstMatchingPositionInSubject) + } + public static func textSegmentBoundary(_ inverted: Bool = false) -> Self { + inverted + ? .init(ast: .notTextSegment) + : .init(ast: .textSegment) + } + public static func startOfLine(_ inverted: Bool = false) -> Self { + .init(ast: .startOfLine) + } + public static func endOfLine(_ inverted: Bool = false) -> Self { + .init(ast: .endOfLine) + } + public static func wordBoundary(_ inverted: Bool = false) -> Self { + inverted + ? .init(ast: .notWordBoundary) + : .init(ast: .wordBoundary) + } + } + + @_spi(RegexBuilder) + public struct Reference { + internal var ast: AST.Reference + } + + @_spi(RegexBuilder) + public struct MatchingOptionSequence { + internal var ast: AST.MatchingOptionSequence + } + + @_spi(RegexBuilder) + public struct Atom { + internal var ast: AST.Atom + } + } +} diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift index 623589b54..a93421f4f 100644 --- a/Sources/_StringProcessing/Regex/Options.swift +++ b/Sources/_StringProcessing/Regex/Options.swift @@ -195,6 +195,6 @@ extension RegexComponent { ? AST.MatchingOptionSequence(adding: [.init(option, location: .fake)]) : AST.MatchingOptionSequence(removing: [.init(option, location: .fake)]) return Regex(node: .nonCapturingGroup( - .changeMatchingOptions(sequence), regex.root)) + .init(ast: .changeMatchingOptions(sequence)), regex.root)) } } diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift index c9762f00e..2debcda9d 100644 --- a/Sources/_StringProcessing/_CharacterClassModel.swift +++ b/Sources/_StringProcessing/_CharacterClassModel.swift @@ -28,7 +28,7 @@ public struct _CharacterClassModel: Hashable { var isInverted: Bool = false // TODO: Split out builtin character classes into their own type? - public enum Representation: Hashable { + public enum Representation: Hashable { /// Any character case any /// Any grapheme cluster @@ -52,10 +52,14 @@ public struct _CharacterClassModel: Hashable { case custom([CharacterSetComponent]) } - public typealias SetOperator = AST.CustomCharacterClass.SetOp + public enum SetOperator: Hashable { + case subtraction + case intersection + case symmetricDifference + } /// A binary set operation that forms a character class component. - public struct SetOperation: Hashable { + public struct SetOperation: Hashable { var lhs: CharacterSetComponent var op: SetOperator var rhs: CharacterSetComponent @@ -72,7 +76,7 @@ public struct _CharacterClassModel: Hashable { } } - public enum CharacterSetComponent: Hashable { + public enum CharacterSetComponent: Hashable { case character(Character) case range(ClosedRange) @@ -294,7 +298,17 @@ extension _CharacterClassModel: CustomStringConvertible { } extension _CharacterClassModel { - public func makeAST() -> AST.Node? { + public func makeDSLTreeCharacterClass() -> DSLTree.CustomCharacterClass? { + // FIXME: Implement in DSLTree instead of wrapping an AST atom + switch makeAST() { + case .atom(let atom): + return .init(members: [.atom(.unconverted(.init(ast: atom)))]) + default: + return nil + } + } + + internal func makeAST() -> AST.Node? { let inv = isInverted func esc(_ b: AST.Atom.EscapedBuiltin) -> AST.Node { @@ -375,7 +389,7 @@ extension DSLTree.Atom { var characterClass: _CharacterClassModel? { switch self { case let .unconverted(a): - return a.characterClass + return a.ast.characterClass default: return nil } diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index 0a7d6fc59..bf4489a68 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -136,7 +136,7 @@ func customTest( class CustomRegexComponentTests: XCTestCase { // TODO: Refactor below into more exhaustive, declarative // tests. - func testCustomRegexComponents() { + func testCustomRegexComponents() throws { customTest( Regex { Numbler() @@ -178,14 +178,13 @@ class CustomRegexComponentTests: XCTestCase { } } - guard let res3 = "ab123c".firstMatch(of: regex3) else { - XCTFail() - return - } + let str = "ab123c" + let res3 = try XCTUnwrap(str.firstMatch(of: regex3)) - XCTAssertEqual(res3.range, "ab123c".index(atOffset: 2)..<"ab123c".index(atOffset: 5)) - XCTAssertEqual(res3.output.0, "123") - XCTAssertEqual(res3.output.1, "123") + let expectedSubstring = str.dropFirst(2).prefix(3) + XCTAssertEqual(res3.range, expectedSubstring.startIndex..