From 0b37083b144fc97287a48dbfbffd6f64e38267a1 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 5 Apr 2023 15:26:33 -0500 Subject: [PATCH 1/2] Fix range-based quantification fast path The fast path for quantification incorrectly discards the last save position when the quantification used up all possible trips, which is only possible with range-based quantifications (e.g. `{0,3}`). This bug shows up when a range-based quantifier matches the maximum - 1 repetitions of the preceding pattern. For example, the regex `/a{0,2}a/` should succeed as a full match any of the strings "aa", "aaa", or "aaaa". However, the pattern fails to match "aaa", since the save point allowing a single "a" to match the first `a{0,2}` part of the regex is discarded. This change only discards the last save position when advancing the quantifier fails due to a failure to match, not maxing out the number of trips. --- .../_StringProcessing/Engine/MEQuantify.swift | 17 ++++++++------ Tests/RegexTests/MatchTests.swift | 22 +++++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index fa68b8b76..0e2f3923a 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -40,7 +40,14 @@ extension Processor { } } let next = _doQuantifyMatch(payload) - guard let idx = next else { break } + guard let idx = next else { + if !savePoint.rangeIsEmpty { + // The last save point has saved the current, non-matching position, + // so it's unneeded. + savePoint.shrinkRange(input) + } + break + } currentPosition = idx trips += 1 } @@ -50,12 +57,8 @@ extension Processor { return false } - if payload.quantKind == .eager && !savePoint.rangeIsEmpty { - // The last save point has saved the current position, so it's unneeded - savePoint.shrinkRange(input) - if !savePoint.rangeIsEmpty { - savePoints.append(savePoint) - } + if !savePoint.rangeIsEmpty { + savePoints.append(savePoint) } return true } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index a1bf0e76f..b2c24ee0a 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2574,4 +2574,26 @@ extension RegexTests { func testFuzzerArtifacts() throws { expectCompletion(regex: #"(b?)\1*"#, in: "a") } + + func testIssue640() throws { + // Original report from https://github.com/apple/swift-experimental-string-processing/issues/640 + let r = try Regex("[1-9][0-9]{0,2}(?:,?[0-9]{3})*") + XCTAssertNotNil("36,769".wholeMatch(of: r)) + XCTAssertNotNil("36769".wholeMatch(of: r)) + + for max in 1...8 { + let pattern = "a{0,\(max)}a" + let regex = try Regex(pattern) + for length in 1...(max + 1) { + let str = String(repeating: "a", count: length) + if str.wholeMatch(of: regex) == nil { + XCTFail("Didn't match '\(pattern)' in '\(str)' (\(max),\(length)).") + } + } + + let possessiveRegex = try Regex("a{0,\(max)}+a") + let str = String(repeating: "a", count: max + 1) + XCTAssertNotNil(str.wholeMatch(of: possessiveRegex)) + } + } } From 56ab01babadecf8f973474c3b059cc3f2c138374 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 6 Apr 2023 22:46:17 -0500 Subject: [PATCH 2/2] Expand test case --- Tests/RegexTests/MatchTests.swift | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index b2c24ee0a..e8e41a114 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2577,17 +2577,26 @@ extension RegexTests { func testIssue640() throws { // Original report from https://github.com/apple/swift-experimental-string-processing/issues/640 - let r = try Regex("[1-9][0-9]{0,2}(?:,?[0-9]{3})*") - XCTAssertNotNil("36,769".wholeMatch(of: r)) - XCTAssertNotNil("36769".wholeMatch(of: r)) - + let original = try Regex("[1-9][0-9]{0,2}(?:,?[0-9]{3})*") + XCTAssertNotNil("36,769".wholeMatch(of: original)) + XCTAssertNotNil("36769".wholeMatch(of: original)) + + // Simplified case + let simplified = try Regex("a{0,2}a") + XCTAssertNotNil("aaa".wholeMatch(of: simplified)) + for max in 1...8 { - let pattern = "a{0,\(max)}a" - let regex = try Regex(pattern) + let patternEager = "a{0,\(max)}a" + let regexEager = try Regex(patternEager) + let patternReluctant = "a{0,\(max)}?a" + let regexReluctant = try Regex(patternReluctant) for length in 1...(max + 1) { let str = String(repeating: "a", count: length) - if str.wholeMatch(of: regex) == nil { - XCTFail("Didn't match '\(pattern)' in '\(str)' (\(max),\(length)).") + if str.wholeMatch(of: regexEager) == nil { + XCTFail("Didn't match '\(patternEager)' in '\(str)' (\(max),\(length)).") + } + if str.wholeMatch(of: regexReluctant) == nil { + XCTFail("Didn't match '\(patternReluctant)' in '\(str)' (\(max),\(length)).") } }