From 6b95d837e987d1d2a0a58b1b3217a8e35e1634af Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Wed, 6 Jul 2022 10:40:11 +0100 Subject: [PATCH 1/3] Mark '\O' unsupported We have decided not to support this for now. --- Sources/_RegexParser/Regex/Parse/Sema.swift | 4 ++-- Tests/RegexTests/MatchTests.swift | 2 +- Tests/RegexTests/ParseTests.swift | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Sources/_RegexParser/Regex/Parse/Sema.swift b/Sources/_RegexParser/Regex/Parse/Sema.swift index 1b9da3e50..0aeee282d 100644 --- a/Sources/_RegexParser/Regex/Parse/Sema.swift +++ b/Sources/_RegexParser/Regex/Parse/Sema.swift @@ -220,14 +220,14 @@ extension RegexValidator { _ esc: AST.Atom.EscapedBuiltin, at loc: SourceLocation ) { switch esc { - case .resetStartOfMatch, .singleDataUnit, + case .resetStartOfMatch, .singleDataUnit, .trueAnychar, // '\N' needs to be emitted using 'emitAny'. .notNewline: error(.unsupported("'\\\(esc.character)'"), at: loc) // Character classes. case .decimalDigit, .notDecimalDigit, .whitespace, .notWhitespace, - .wordCharacter, .notWordCharacter, .graphemeCluster, .trueAnychar, + .wordCharacter, .notWordCharacter, .graphemeCluster, .horizontalWhitespace, .notHorizontalWhitespace, .verticalTab, .notVerticalTab: break diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 51da6d010..d5a8eefc1 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -1786,7 +1786,7 @@ extension RegexTests { match: eDecomposed, xfail: true ) - firstMatchTest(#"\O"#, input: eComposed, match: eComposed) + firstMatchTest(#"\O"#, input: eComposed, match: eComposed, xfail: true) firstMatchTest(#"\O"#, input: eDecomposed, match: nil, xfail: true) diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 51654c057..9803bb2e1 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -806,6 +806,8 @@ extension RegexTests { parseTest(#"\M-\C--"#, atom(.keyboardMetaControl("-")), unsupported: true) parseTest(#"\M-a"#, atom(.keyboardMeta("a")), unsupported: true) + parseTest(#"\O"#, escaped(.trueAnychar), unsupported: true) + // MARK: Comments parseTest( From e402860e207abe97011f6f3a083ba402fe812297 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Wed, 6 Jul 2022 10:40:12 +0100 Subject: [PATCH 2/3] Remove CharacterClass.anyUnicodeScalar We have decided not to support this for now. --- Sources/RegexBuilder/CharacterClass.swift | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift index 0c34b0de2..7704f2ad0 100644 --- a/Sources/RegexBuilder/CharacterClass.swift +++ b/Sources/RegexBuilder/CharacterClass.swift @@ -52,10 +52,6 @@ extension RegexComponent where Self == CharacterClass { .init(unconverted: .anyGrapheme) } - public static var anyUnicodeScalar: CharacterClass { - .init(unconverted: .anyUnicodeScalar) - } - public static var whitespace: CharacterClass { .init(unconverted: .whitespace) } From 52fc547501a14c625c5ac90516a82f4e72053315 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Wed, 6 Jul 2022 10:40:12 +0100 Subject: [PATCH 3/3] Rename `anyGrapheme` -> `anyGraphemeCluster` Update to match the proposal. --- Sources/RegexBuilder/CharacterClass.swift | 2 +- Tests/RegexBuilderTests/AlgorithmsTests.swift | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift index 7704f2ad0..a6d18b2cf 100644 --- a/Sources/RegexBuilder/CharacterClass.swift +++ b/Sources/RegexBuilder/CharacterClass.swift @@ -48,7 +48,7 @@ extension RegexComponent where Self == CharacterClass { .init(DSLTree.CustomCharacterClass(members: [.atom(.any)])) } - public static var anyGrapheme: CharacterClass { + public static var anyGraphemeCluster: CharacterClass { .init(unconverted: .anyGrapheme) } diff --git a/Tests/RegexBuilderTests/AlgorithmsTests.swift b/Tests/RegexBuilderTests/AlgorithmsTests.swift index a7d41b3ed..52d758bdb 100644 --- a/Tests/RegexBuilderTests/AlgorithmsTests.swift +++ b/Tests/RegexBuilderTests/AlgorithmsTests.swift @@ -260,7 +260,7 @@ class AlgorithmsResultBuilderTests: XCTestCase { func testStartsAndContains() throws { let fam = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง we โ“‡ family" let startsWithGrapheme = fam.starts { - OneOrMore(.anyGrapheme) + OneOrMore(.anyGraphemeCluster) OneOrMore(.whitespace) } XCTAssertEqual(startsWithGrapheme, true) @@ -272,7 +272,7 @@ class AlgorithmsResultBuilderTests: XCTestCase { let content = { Regex { - OneOrMore(.anyGrapheme) + OneOrMore(.anyGraphemeCluster) OneOrMore(.whitespace) } } @@ -321,7 +321,7 @@ class AlgorithmsResultBuilderTests: XCTestCase { var mutable = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ we โ“‡ family" mutable.trimPrefix { - .anyGrapheme + .anyGraphemeCluster ZeroOrMore(.whitespace) } XCTAssertEqual(mutable, "we โ“‡ family")