From 7c5028d04f04203fe4145a750b752acfe1b35c56 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 9 May 2022 18:09:34 -0500 Subject: [PATCH 1/3] Catch more unquantifiable elements This adds start/end anchors ^ and $, groups that form zero-width assertions, and option-changing groups. --- Sources/_RegexParser/Regex/AST/AST.swift | 4 +++- Sources/_RegexParser/Regex/AST/Atom.swift | 2 ++ Sources/_RegexParser/Regex/AST/Group.swift | 15 +++++++++++++++ Tests/RegexTests/ParseTests.swift | 4 ++++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/Sources/_RegexParser/Regex/AST/AST.swift b/Sources/_RegexParser/Regex/AST/AST.swift index a7dcd2015..be1548b72 100644 --- a/Sources/_RegexParser/Regex/AST/AST.swift +++ b/Sources/_RegexParser/Regex/AST/AST.swift @@ -125,7 +125,9 @@ extension AST.Node { switch self { case .atom(let a): return a.isQuantifiable - case .group, .conditional, .customCharacterClass, .absentFunction: + case .group(let g): + return g.isQuantifiable + case .conditional, .customCharacterClass, .absentFunction: return true case .alternation, .concatenation, .quantification, .quote, .trivia, .empty: diff --git a/Sources/_RegexParser/Regex/AST/Atom.swift b/Sources/_RegexParser/Regex/AST/Atom.swift index 9b0f1cb2e..8e4e76fa1 100644 --- a/Sources/_RegexParser/Regex/AST/Atom.swift +++ b/Sources/_RegexParser/Regex/AST/Atom.swift @@ -768,6 +768,8 @@ extension AST.Atom { // TODO: Are callouts quantifiable? case .escaped(let esc): return esc.isQuantifiable + case .startOfLine, .endOfLine: + return false default: return true } diff --git a/Sources/_RegexParser/Regex/AST/Group.swift b/Sources/_RegexParser/Regex/AST/Group.swift index 8ecaadeda..cc87614fc 100644 --- a/Sources/_RegexParser/Regex/AST/Group.swift +++ b/Sources/_RegexParser/Regex/AST/Group.swift @@ -136,3 +136,18 @@ extension AST.Group { } } } + +extension AST.Group { + var isQuantifiable: Bool { + switch kind.value { + case .capture, .namedCapture, .balancedCapture, .nonCapture, + .nonCaptureReset, .atomicNonCapturing, .scriptRun, .atomicScriptRun: + return true + + case .lookahead, .negativeLookahead, .nonAtomicLookahead, + .lookbehind, .negativeLookbehind, .nonAtomicLookbehind, + .changeMatchingOptions: + return false + } + } +} diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 9dfcff99e..f5e98b1fc 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -2569,6 +2569,10 @@ extension RegexTests { diagnosticTest(#"\Z??"#, .notQuantifiable) diagnosticTest(#"\G*?"#, .notQuantifiable) diagnosticTest(#"\z+?"#, .notQuantifiable) + diagnosticTest(#"^*"#, .notQuantifiable) + diagnosticTest(#"$?"#, .notQuantifiable) + diagnosticTest(#"(?=a)+"#, .notQuantifiable) + diagnosticTest(#"(?i)*"#, .notQuantifiable) diagnosticTest(#"\K{1}"#, .unsupported(#"'\K'"#)) diagnosticTest(#"\y{2,5}"#, .notQuantifiable) diagnosticTest(#"\Y{3,}"#, .notQuantifiable) From fa6ed79eaef494050fe2f751f5f87422064792d9 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 9 May 2022 18:32:18 -0500 Subject: [PATCH 2/3] Remove test that includes an unquantifiable group --- Tests/RegexTests/ParseTests.swift | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index f5e98b1fc..6f6502085 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -1756,10 +1756,6 @@ extension RegexTests { charClass("a", "b") )) - parseTest(#"(?i:)?"#, zeroOrOne(of: changeMatchingOptions( - matchingOptions(adding: .caseInsensitive), empty() - ))) - // Test multi-line comment handling. parseTest( """ @@ -2573,6 +2569,7 @@ extension RegexTests { diagnosticTest(#"$?"#, .notQuantifiable) diagnosticTest(#"(?=a)+"#, .notQuantifiable) diagnosticTest(#"(?i)*"#, .notQuantifiable) + diagnosticTest(#"(?i:)?"#, .notQuantifiable) diagnosticTest(#"\K{1}"#, .unsupported(#"'\K'"#)) diagnosticTest(#"\y{2,5}"#, .notQuantifiable) diagnosticTest(#"\Y{3,}"#, .notQuantifiable) From 784a22571948823cba6420fc41cfebf1dd44e081 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 10 May 2022 11:46:07 -0500 Subject: [PATCH 3/3] Option-changing grps with content are quantifiable --- Sources/_RegexParser/Regex/AST/Group.swift | 6 +++--- Tests/RegexTests/ParseTests.swift | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Sources/_RegexParser/Regex/AST/Group.swift b/Sources/_RegexParser/Regex/AST/Group.swift index cc87614fc..6fd46abe7 100644 --- a/Sources/_RegexParser/Regex/AST/Group.swift +++ b/Sources/_RegexParser/Regex/AST/Group.swift @@ -141,12 +141,12 @@ extension AST.Group { var isQuantifiable: Bool { switch kind.value { case .capture, .namedCapture, .balancedCapture, .nonCapture, - .nonCaptureReset, .atomicNonCapturing, .scriptRun, .atomicScriptRun: + .nonCaptureReset, .atomicNonCapturing, .scriptRun, .atomicScriptRun, + .changeMatchingOptions: return true case .lookahead, .negativeLookahead, .nonAtomicLookahead, - .lookbehind, .negativeLookbehind, .nonAtomicLookbehind, - .changeMatchingOptions: + .lookbehind, .negativeLookbehind, .nonAtomicLookbehind: return false } } diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 6f6502085..f5e98b1fc 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -1756,6 +1756,10 @@ extension RegexTests { charClass("a", "b") )) + parseTest(#"(?i:)?"#, zeroOrOne(of: changeMatchingOptions( + matchingOptions(adding: .caseInsensitive), empty() + ))) + // Test multi-line comment handling. parseTest( """ @@ -2569,7 +2573,6 @@ extension RegexTests { diagnosticTest(#"$?"#, .notQuantifiable) diagnosticTest(#"(?=a)+"#, .notQuantifiable) diagnosticTest(#"(?i)*"#, .notQuantifiable) - diagnosticTest(#"(?i:)?"#, .notQuantifiable) diagnosticTest(#"\K{1}"#, .unsupported(#"'\K'"#)) diagnosticTest(#"\y{2,5}"#, .notQuantifiable) diagnosticTest(#"\Y{3,}"#, .notQuantifiable)