diff --git a/Sources/_RegexParser/Regex/AST/CustomCharClass.swift b/Sources/_RegexParser/Regex/AST/CustomCharClass.swift index d28490f25..087387c1e 100644 --- a/Sources/_RegexParser/Regex/AST/CustomCharClass.swift +++ b/Sources/_RegexParser/Regex/AST/CustomCharClass.swift @@ -51,11 +51,16 @@ extension AST { public var lhs: Atom public var dashLoc: SourceLocation public var rhs: Atom + public var trivia: [AST.Trivia] - public init(_ lhs: Atom, _ dashLoc: SourceLocation, _ rhs: Atom) { + public init( + _ lhs: Atom, _ dashLoc: SourceLocation, _ rhs: Atom, + trivia: [AST.Trivia] + ) { self.lhs = lhs self.dashLoc = dashLoc self.rhs = rhs + self.trivia = trivia } } public enum SetOp: String, Hashable { @@ -95,6 +100,11 @@ extension CustomCC.Member { return false } + public var asTrivia: AST.Trivia? { + guard case .trivia(let t) = self else { return nil } + return t + } + public var isSemantic: Bool { !isTrivia } diff --git a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift index cbc9d5eb2..a6dfa0ce9 100644 --- a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift +++ b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift @@ -149,6 +149,14 @@ extension Source { return result } + /// Perform a lookahead using a temporary source. Within the body of the + /// lookahead, any modifications to the source will not be reflected outside + /// the body. + func lookahead(_ body: (inout Source) throws -> T) rethrows -> T { + var src = self + return try body(&src) + } + /// Attempt to eat the given character, returning its source location if /// successful, `nil` otherwise. mutating func tryEatWithLoc(_ c: Character) -> SourceLocation? { @@ -413,9 +421,7 @@ extension Source { ) throws -> (Located, Located, [AST.Trivia])? { var trivia: [AST.Trivia] = [] - if let t = try lexNonSemanticWhitespace(context: context) { - trivia.append(t) - } + if let t = lexNonSemanticWhitespace(context: context) { trivia.append(t) } let amt: Located? = try recordLoc { src in if src.tryEat("*") { return .zeroOrMore } @@ -424,7 +430,7 @@ extension Source { return try src.tryEating { src in guard src.tryEat("{"), - let range = try src.lexRange(context: context), + let range = try src.lexRange(context: context, trivia: &trivia), src.tryEat("}") else { return nil } return range.value @@ -433,9 +439,7 @@ extension Source { guard let amt = amt else { return nil } // PCRE allows non-semantic whitespace here in extended syntax mode. - if let t = try lexNonSemanticWhitespace(context: context) { - trivia.append(t) - } + if let t = lexNonSemanticWhitespace(context: context) { trivia.append(t) } let kind: Located = recordLoc { src in if src.tryEat("?") { return .reluctant } @@ -452,11 +456,17 @@ extension Source { /// | ExpRange /// ExpRange -> '..<' | '...' /// | '..<' | '...' ? - mutating func lexRange(context: ParsingContext) throws -> Located? { + mutating func lexRange( + context: ParsingContext, trivia: inout [AST.Trivia] + ) throws -> Located? { try recordLoc { src in try src.tryEating { src in + if let t = src.lexWhitespace() { trivia.append(t) } + let lowerOpt = try src.lexNumber() + if let t = src.lexWhitespace() { trivia.append(t) } + // ',' or '...' or '..<' or nothing // TODO: We ought to try and consume whitespace here and emit a // diagnostic for the user warning them that it would cause the range to @@ -476,11 +486,15 @@ extension Source { closedRange = nil } + if let t = src.lexWhitespace() { trivia.append(t) } + let upperOpt = try src.lexNumber()?.map { upper in // If we have an open range, the upper bound should be adjusted down. closedRange == true ? upper : upper - 1 } + if let t = src.lexWhitespace() { trivia.append(t) } + switch (lowerOpt, closedRange, upperOpt) { case let (l?, nil, nil): return .exactly(l) @@ -625,11 +639,11 @@ extension Source { /// mutating func lexComment(context: ParsingContext) throws -> AST.Trivia? { let trivia: Located? = try recordLoc { src in - if src.tryEat(sequence: "(?#") { - return try src.expectQuoted(endingWith: ")").value + if !context.isInCustomCharacterClass && src.tryEat(sequence: "(?#") { + return try src.lexUntil(eating: ")").value } if context.experimentalComments, src.tryEat(sequence: "/*") { - return try src.expectQuoted(endingWith: "*/").value + return try src.lexUntil(eating: "*/").value } if context.endOfLineComments, src.tryEat("#") { // Try eat until we either exhaust the input, or hit a newline. Note @@ -667,7 +681,7 @@ extension Source { /// Does nothing unless `SyntaxOptions.nonSemanticWhitespace` is set mutating func lexNonSemanticWhitespace( context: ParsingContext - ) throws -> AST.Trivia? { + ) -> AST.Trivia? { guard context.ignoreWhitespace else { return nil } // FIXME: PCRE only treats space and tab characters as whitespace when @@ -699,7 +713,7 @@ extension Source { if let comment = try lexComment(context: context) { return comment } - if let whitespace = try lexNonSemanticWhitespace(context: context) { + if let whitespace = lexNonSemanticWhitespace(context: context) { return whitespace } return nil @@ -1178,8 +1192,7 @@ extension Source { } } - mutating func lexCustomCCStart( - ) throws -> Located? { + mutating func lexCustomCCStart() -> Located? { recordLoc { src in // Make sure we don't have a POSIX character property. This may require // walking to its ending to make sure we have a closing ':]', as otherwise @@ -1240,8 +1253,9 @@ extension Source { private func canLexPOSIXCharacterProperty() -> Bool { do { - var src = self - return try src.lexPOSIXCharacterProperty() != nil + return try lookahead { src in + try src.lexPOSIXCharacterProperty() != nil + } } catch { // We want to tend on the side of lexing a POSIX character property, so // even if it is invalid in some way (e.g invalid property names), still @@ -1394,10 +1408,11 @@ extension Source { /// Checks whether a numbered reference can be lexed. private func canLexNumberedReference() -> Bool { - var src = self - _ = src.tryEat(anyOf: "+", "-") - guard let next = src.peek() else { return false } - return RadixKind.decimal.characterFilter(next) + lookahead { src in + _ = src.tryEat(anyOf: "+", "-") + guard let next = src.peek() else { return false } + return RadixKind.decimal.characterFilter(next) + } } /// Eat a named reference up to a given closing delimiter. @@ -1587,53 +1602,55 @@ extension Source { /// Whether we can lex a group-like reference after the specifier '(?'. private func canLexGroupLikeReference() -> Bool { - var src = self - if src.tryEat("P") { - return src.tryEat(anyOf: "=", ">") != nil - } - if src.tryEat(anyOf: "&", "R") != nil { - return true + lookahead { src in + if src.tryEat("P") { + return src.tryEat(anyOf: "=", ">") != nil + } + if src.tryEat(anyOf: "&", "R") != nil { + return true + } + return src.canLexNumberedReference() } - return src.canLexNumberedReference() } private func canLexMatchingOptionsAsAtom(context: ParsingContext) -> Bool { - var src = self - - // See if we can lex a matching option sequence that terminates in ')'. Such - // a sequence is an atom. If an error is thrown, there are invalid elements - // of the matching option sequence. In such a case, we can lex as a group - // and diagnose the invalid group kind. - guard (try? src.lexMatchingOptionSequence(context: context)) != nil else { - return false + lookahead { src in + // See if we can lex a matching option sequence that terminates in ')'. + // Such a sequence is an atom. If an error is thrown, there are invalid + // elements of the matching option sequence. In such a case, we can lex as + // a group and diagnose the invalid group kind. + guard (try? src.lexMatchingOptionSequence(context: context)) != nil else { + return false + } + return src.tryEat(")") } - return src.tryEat(")") } /// Whether a group specifier should be lexed as an atom instead of a group. private func shouldLexGroupLikeAtom(context: ParsingContext) -> Bool { - var src = self - guard src.tryEat("(") else { return false } + lookahead { src in + guard src.tryEat("(") else { return false } - if src.tryEat("?") { - // The start of a reference '(?P=', '(?R', ... - if src.canLexGroupLikeReference() { return true } + if src.tryEat("?") { + // The start of a reference '(?P=', '(?R', ... + if src.canLexGroupLikeReference() { return true } - // The start of a PCRE callout. - if src.tryEat("C") { return true } + // The start of a PCRE callout. + if src.tryEat("C") { return true } - // The start of an Oniguruma 'of-contents' callout. - if src.tryEat("{") { return true } + // The start of an Oniguruma 'of-contents' callout. + if src.tryEat("{") { return true } - // A matching option atom (?x), (?i), ... - if src.canLexMatchingOptionsAsAtom(context: context) { return true } + // A matching option atom (?x), (?i), ... + if src.canLexMatchingOptionsAsAtom(context: context) { return true } + + return false + } + // The start of a backreference directive or Oniguruma named callout. + if src.tryEat("*") { return true } return false } - // The start of a backreference directive or Oniguruma named callout. - if src.tryEat("*") { return true } - - return false } /// Consume an escaped atom, starting from after the backslash @@ -2022,20 +2039,11 @@ extension Source { return AST.Atom(kind.value, kind.location) } - /// Try to lex the end of a range in a custom character class, which consists - /// of a '-' character followed by an atom. - mutating func lexCustomCharClassRangeEnd( - context: ParsingContext - ) throws -> (dashLoc: SourceLocation, AST.Atom)? { - // Make sure we don't have a binary operator e.g '--', and the '-' is not - // ending the custom character class (in which case it is literal). - guard peekCCBinOp() == nil, !starts(with: "-]"), - let dash = tryEatWithLoc("-"), - let end = try lexAtom(context: context) - else { - return nil - } - return (dash, end) + /// Try to lex the range operator '-' for a custom character class. + mutating func lexCustomCharacterClassRangeOperator() -> SourceLocation? { + // Eat a '-', making sure we don't have a binary op such as '--'. + guard peekCCBinOp() == nil else { return nil } + return tryEatWithLoc("-") } /// Try to consume a newline sequence matching option kind. diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift index a3ddfb812..84957220c 100644 --- a/Sources/_RegexParser/Regex/Parse/Parse.swift +++ b/Sources/_RegexParser/Regex/Parse/Parse.swift @@ -429,7 +429,7 @@ extension Parser { } // Check if we have the start of a custom character class '['. - if let cccStart = try source.lexCustomCCStart() { + if let cccStart = source.lexCustomCCStart() { return .customCharacterClass( try parseCustomCharacterClass(cccStart)) } @@ -485,16 +485,7 @@ extension Parser { throw Source.LocatedError( ParseError.expectedCustomCharacterClassMembers, start.location) } - - // If we're done, bail early - let setOp = Member.setOperation(members, binOp, rhs) - if source.tryEat("]") { - return CustomCC( - start, [setOp], loc(start.location.start)) - } - - // Otherwise it's just another member to accumulate - members = [setOp] + members = [.setOperation(members, binOp, rhs)] } if members.none(\.isSemantic) { throw Source.LocatedError( @@ -504,45 +495,72 @@ extension Parser { return CustomCC(start, members, loc(start.location.start)) } + mutating func parseCCCMember() throws -> CustomCC.Member? { + guard !source.isEmpty && source.peek() != "]" && source.peekCCBinOp() == nil + else { return nil } + + // Nested custom character class. + if let cccStart = source.lexCustomCCStart() { + return .custom(try parseCustomCharacterClass(cccStart)) + } + + // Quoted sequence. + if let quote = try source.lexQuote(context: context) { + return .quote(quote) + } + + // Lex triva if we're allowed. + if let trivia = try source.lexTrivia(context: context) { + return .trivia(trivia) + } + + if let atom = try source.lexAtom(context: context) { + return .atom(atom) + } + return nil + } + mutating func parseCCCMembers( into members: inout Array ) throws { // Parse members until we see the end of the custom char class or an // operator. - while !source.isEmpty && source.peek() != "]" && - source.peekCCBinOp() == nil { - - // Nested custom character class. - if let cccStart = try source.lexCustomCCStart() { - members.append(.custom(try parseCustomCharacterClass(cccStart))) - continue - } - - // Quoted sequence. - if let quote = try source.lexQuote(context: context) { - members.append(.quote(quote)) - continue - } - - // Lex non-semantic whitespace if we're allowed. - // TODO: ICU allows end-of-line comments in custom character classes, - // which we ought to support if we want to support multi-line regex. - if let trivia = try source.lexNonSemanticWhitespace(context: context) { - members.append(.trivia(trivia)) - continue - } + while let member = try parseCCCMember() { + members.append(member) + + // If we have an atom, we can try to parse a character class range. Each + // time we parse a component of the range, we append to `members` in case + // it ends up not being a range, and we bail. If we succeed in parsing, we + // remove the intermediate members. + if case .atom(let lhs) = member { + let membersBeforeRange = members.count - 1 + + while let t = try source.lexTrivia(context: context) { + members.append(.trivia(t)) + } - guard let atom = try source.lexAtom(context: context) else { break } + guard let dash = source.lexCustomCharacterClassRangeOperator() else { + continue + } + // If we can't parse a range, '-' becomes literal, e.g `[6-]`. + members.append(.atom(.init(.char("-"), dash))) - // Range between atoms. - if let (dashLoc, rhs) = - try source.lexCustomCharClassRangeEnd(context: context) { - members.append(.range(.init(atom, dashLoc, rhs))) - continue + while let t = try source.lexTrivia(context: context) { + members.append(.trivia(t)) + } + guard let rhs = try parseCCCMember() else { continue } + members.append(rhs) + + guard case let .atom(rhs) = rhs else { continue } + + // We've successfully parsed an atom LHS and RHS, so form a range, + // collecting the trivia we've parsed, and replacing the members that + // would have otherwise been added to the custom character class. + let rangeMemberCount = members.count - membersBeforeRange + let trivia = members.suffix(rangeMemberCount).compactMap(\.asTrivia) + members.removeLast(rangeMemberCount) + members.append(.range(.init(lhs, dash, rhs, trivia: trivia))) } - - members.append(.atom(atom)) - continue } } } diff --git a/Sources/_RegexParser/Regex/Printing/DumpAST.swift b/Sources/_RegexParser/Regex/Printing/DumpAST.swift index 110b7b480..10e50d712 100644 --- a/Sources/_RegexParser/Regex/Printing/DumpAST.swift +++ b/Sources/_RegexParser/Regex/Printing/DumpAST.swift @@ -309,7 +309,7 @@ extension AST.CustomCharacterClass: _ASTNode { // comparisons of dumped output in tests. // TODO: We should eventually have some way of filtering out trivia for // tests, so that it can appear in regular dumps. - return "customCharacterClass(\(strippingTriviaShallow.members))" + return "customCharacterClass(inverted: \(isInverted), \(strippingTriviaShallow.members))" } } diff --git a/Sources/_StringProcessing/Utility/ASTBuilder.swift b/Sources/_StringProcessing/Utility/ASTBuilder.swift index 78477e2b5..49a08430d 100644 --- a/Sources/_StringProcessing/Utility/ASTBuilder.swift +++ b/Sources/_StringProcessing/Utility/ASTBuilder.swift @@ -319,6 +319,14 @@ func charClass( return .custom(cc) } +func setOp( + _ lhs: AST.CustomCharacterClass.Member..., + op: AST.CustomCharacterClass.SetOp, + _ rhs: AST.CustomCharacterClass.Member... +) -> AST.CustomCharacterClass.Member { + .setOperation(lhs, .init(faking: op), rhs) +} + func quote(_ s: String) -> AST.Node { .quote(.init(s, .fake)) } @@ -408,7 +416,7 @@ func prop_m( func range_m( _ lower: AST.Atom, _ upper: AST.Atom ) -> AST.CustomCharacterClass.Member { - .range(.init(lower, .fake, upper)) + .range(.init(lower, .fake, upper, trivia: [])) } func range_m( _ lower: AST.Atom.Kind, _ upper: AST.Atom.Kind diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 4ae489300..985fb70c6 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -341,8 +341,12 @@ extension RegexTests { firstMatchTest( #"a{1,2}"#, input: "123aaaxyz", match: "aa") + firstMatchTest( + #"a{ 1 , 2 }"#, input: "123aaaxyz", match: "aa") firstMatchTest( #"a{,2}"#, input: "123aaaxyz", match: "") + firstMatchTest( + #"a{ , 2 }"#, input: "123aaaxyz", match: "") firstMatchTest( #"a{,2}x"#, input: "123aaaxyz", match: "aax") firstMatchTest( @@ -351,6 +355,8 @@ extension RegexTests { #"a{2,}"#, input: "123aaaxyz", match: "aaa") firstMatchTest( #"a{1}"#, input: "123aaaxyz", match: "a") + firstMatchTest( + #"a{ 1 }"#, input: "123aaaxyz", match: "a") firstMatchTest( #"a{1,2}?"#, input: "123aaaxyz", match: "a") firstMatchTest( @@ -588,6 +594,8 @@ extension RegexTests { firstMatchTest("[a-z]", input: "123ABCxyz", match: "x") firstMatchTest("[a-z]", input: "123-abcxyz", match: "a") + firstMatchTest("(?x)[ a - z ]+", input: " 123-abcxyz", match: "abcxyz") + // Character class subtraction firstMatchTest("[a-d--a-c]", input: "123abcdxyz", match: "d") diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index e36d84f60..6269cdff6 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -517,14 +517,15 @@ extension RegexTests { "[a-b-c]", charClass(range_m("a", "b"), "-", "c")) parseTest("[-a-]", charClass("-", "a", "-")) + parseTest("[[a]-]", charClass(charClass("a"), "-")) + parseTest("[[a]-b]", charClass(charClass("a"), "-", "b")) parseTest("[a-z]", charClass(range_m("a", "z"))) parseTest("[a-a]", charClass(range_m("a", "a"))) parseTest("[B-a]", charClass(range_m("B", "a"))) - // FIXME: AST builder helpers for custom char class types parseTest("[a-d--a-c]", charClass( - .setOperation([range_m("a", "d")], .init(faking: .subtraction), [range_m("a", "c")]) + setOp(range_m("a", "d"), op: .subtraction, range_m("a", "c")) )) parseTest("[-]", charClass("-")) @@ -680,34 +681,45 @@ extension RegexTests { throwsError: .unsupported ) + parseTest(#"(?x)[ a - b ]"#, concat( + changeMatchingOptions(matchingOptions(adding: .extended)), + charClass(range_m("a", "b")) + )) + + parseTest(#"(?x)[a - b]"#, concat( + changeMatchingOptions(matchingOptions(adding: .extended)), + charClass(range_m("a", "b")) + )) + // MARK: Operators parseTest( #"[a[bc]de&&[^bc]\d]+"#, - oneOrMore(of: charClass( - .setOperation( - ["a", charClass("b", "c"), "d", "e"], - .init(faking: .intersection), - [charClass("b", "c", inverted: true), atom_m(.escaped(.decimalDigit))] - )))) + oneOrMore(of: charClass(setOp( + "a", charClass("b", "c"), "d", "e", + op: .intersection, + charClass("b", "c", inverted: true), atom_m(.escaped(.decimalDigit)) + ))) + ) parseTest( - "[a&&b]", - charClass( - .setOperation(["a"], .init(faking: .intersection), ["b"]))) + "[a&&b]", charClass(setOp("a", op: .intersection, "b")) + ) parseTest( "[abc--def]", - charClass(.setOperation(["a", "b", "c"], .init(faking: .subtraction), ["d", "e", "f"]))) + charClass(setOp("a", "b", "c", op: .subtraction, "d", "e", "f")) + ) // We left-associate for chained operators. parseTest( "[ab&&b~~cd]", - charClass( - .setOperation( - [.setOperation(["a", "b"], .init(faking: .intersection), ["b"])], - .init(faking: .symmetricDifference), - ["c", "d"]))) + charClass(setOp( + setOp("a", "b", op: .intersection, "b"), + op: .symmetricDifference, + "c", "d" + )) + ) // Operators are only valid in custom character classes. parseTest( @@ -723,11 +735,11 @@ extension RegexTests { parseTest( "[ && ]", - charClass(.setOperation([" "], .init(faking: .intersection), [" ", " "])) + charClass(setOp(" ", op: .intersection, " ", " ")) ) parseTest("(?x)[ a && b ]", concat( changeMatchingOptions(matchingOptions(adding: .extended)), - charClass(.setOperation(["a"], .init(faking: .intersection), ["b"])) + charClass(setOp("a", op: .intersection, "b")) )) // MARK: Quotes @@ -832,6 +844,10 @@ extension RegexTests { #"a{1,1}"#, quantRange(1...1, of: "a")) + parseTest("x{3, 5}", quantRange(3 ... 5, of: "x")) + parseTest("x{ 3 , 5 }", quantRange(3 ... 5, of: "x")) + parseTest("x{3 }", exactly(3, of: "x")) + // Make sure ranges get treated as literal if invalid. parseTest("{", "{") parseTest("{,", concat("{", ",")) @@ -851,11 +867,6 @@ extension RegexTests { parseTest("x{+", concat("x", oneOrMore(of: "{"))) parseTest("x{6,+", concat("x", "{", "6", oneOrMore(of: ","))) - // TODO: We should emit a diagnostic for this. - parseTest("x{3, 5}", concat("x", "{", "3", ",", " ", "5", "}")) - parseTest("{3, 5}", concat("{", "3", ",", " ", "5", "}")) - parseTest("{3 }", concat("{", "3", " ", "}")) - // MARK: Groups // Named captures @@ -1648,6 +1659,9 @@ extension RegexTests { parseTest("[(?#abc)]", charClass("(", "?", "#", "a", "b", "c", ")")) parseTest("# abc", concat("#", " ", "a", "b", "c")) + parseTest("(?#)", empty()) + parseTest("/**/", empty(), syntax: .experimental) + // MARK: Matching option changing parseTest( @@ -1675,12 +1689,8 @@ extension RegexTests { ) ) - // End of line comments aren't applicable in custom char classes. - // TODO: ICU supports this. - parseTest("(?x)[ # abc]", concat( - changeMatchingOptions(matchingOptions(adding: .extended)), - charClass("#", "a", "b", "c") - )) + parseTest("[ # abc]", charClass(" ", "#", " ", "a", "b", "c")) + parseTest("[#]", charClass("#")) parseTest("(?x)a b c[d e f]", concat( changeMatchingOptions(matchingOptions(adding: .extended)), @@ -1768,10 +1778,10 @@ extension RegexTests { // PCRE states that whitespace won't be ignored within a range. // http://pcre.org/current/doc/html/pcre2api.html#SEC20 - // TODO: We ought to warn on this, and produce a range anyway. + // We however do ignore it. parseTest("(?x)a{1, 3}", concat( changeMatchingOptions(matchingOptions(adding: .extended)), - "a", "{", "1", ",", "3", "}" + quantRange(1 ... 3, of: "a") )) // Test that we cover the list of whitespace characters covered by PCRE. @@ -2113,6 +2123,26 @@ extension RegexTests { /# """#, scalarSeq("\u{AB}", "\u{B}", "\u{C}")) + parseWithDelimitersTest(#""" + #/ + [ + a # interesting + b-c #a + d] + /# + """#, charClass("a", range_m("b", "c"), "d")) + + parseWithDelimitersTest(#""" + #/ + [ + a # interesting + - #a + b + ] + /# + """#, charClass(range_m("a", "b"))) + + // MARK: Delimiter skipping: Make sure we can skip over the ending delimiter // if it's clear that it's part of the regex syntax. @@ -2210,6 +2240,8 @@ extension RegexTests { parseNotEqualTest(#"[abc]"#, #"[a b c]"#) + parseNotEqualTest("[abc]", "[^abc]") + parseNotEqualTest(#"\1"#, #"\10"#) parseNotEqualTest("(?^:)", ("(?-:)")) @@ -2540,6 +2572,12 @@ extension RegexTests { diagnosticTest(#"[c-b]"#, .invalidCharacterRange(from: "c", to: "b")) diagnosticTest(#"[\u{66}-\u{65}]"#, .invalidCharacterRange(from: "\u{66}", to: "\u{65}")) + diagnosticTest("(?x)[(?#)]", .expected("]")) + diagnosticTest("(?x)[(?#abc)]", .expected("]")) + + diagnosticTest("(?x)[#]", .expectedCustomCharacterClassMembers) + diagnosticTest("(?x)[ # abc]", .expectedCustomCharacterClassMembers) + // MARK: Bad escapes diagnosticTest("\\", .expectedEscape) @@ -2690,6 +2728,9 @@ extension RegexTests { diagnosticTest("{1,3}", .quantifierRequiresOperand("{1,3}")) diagnosticTest("a{3,2}", .invalidQuantifierRange(3, 2)) + diagnosticTest("{3, 5}", .quantifierRequiresOperand("{3, 5}")) + diagnosticTest("{3 }", .quantifierRequiresOperand("{3 }")) + // These are not quantifiable. diagnosticTest(#"\b?"#, .notQuantifiable) diagnosticTest(#"\B*"#, .notQuantifiable)