diff --git a/.gitignore b/.gitignore index a7e7e4d09..ff85b9fa3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ .DS_Store +# The current toolchain is dumping files in the package root, rude +*.emit-module.* + # Xcode # # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore diff --git a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift index 911312121..5cc920063 100644 --- a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift +++ b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift @@ -32,8 +32,8 @@ extension Source { static private func classifyGeneralCategory( _ str: String ) -> Unicode.ExtendedGeneralCategory? { - // This uses the aliases defined in - // https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt. + // This uses the aliases defined in https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt. + // Additionally, uses the `L& = Lc` alias defined by PCRE. withNormalizedForms(str) { str in switch str { case "c", "other": return .other @@ -43,7 +43,7 @@ extension Source { case "co", "privateuse": return .privateUse case "cs", "surrogate": return .surrogate case "l", "letter": return .letter - case "lc", "casedletter": return .casedLetter + case "lc", "l&", "casedletter": return .casedLetter case "ll", "lowercaseletter": return .lowercaseLetter case "lm", "modifierletter": return .modifierLetter case "lo", "otherletter": return .otherLetter diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 356b7cc4b..a44c2c876 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -691,8 +691,9 @@ extension Unicode.ExtendedGeneralCategory { ]) case .casedLetter: - throw Unsupported( - "TODO: cased letter? not the property?") + return consumeScalarGCs([ + .uppercaseLetter, .lowercaseLetter, .titlecaseLetter + ]) case .control: return consumeScalarGC(.control) diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 345e80e22..2c6b858cc 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -693,6 +693,14 @@ extension RegexTests { firstMatchTest(#"\p{gc=L}"#, input: "123abcXYZ", match: "a") firstMatchTest(#"\p{Lu}"#, input: "123abcXYZ", match: "X") + // U+0374 GREEK NUMERAL SIGN (Lm) + // U+00AA FEMININE ORDINAL INDICATOR (Lo) + firstMatchTest(#"\p{L}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "\u{0374}") + firstMatchTest(#"\p{Lc}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "a") + firstMatchTest(#"\p{Lc}"#, input: "\u{0374}\u{00AA}123XYZ", match: "X") + firstMatchTest(#"\p{L&}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "a") + firstMatchTest(#"\p{L&}"#, input: "\u{0374}\u{00AA}123XYZ", match: "X") + firstMatchTest( #"\P{Cc}"#, input: "\n\n\nXYZ", match: "X") firstMatchTest( diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index aeefe6477..f0013b158 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -1156,6 +1156,9 @@ extension RegexTests { #"\p{C}+"#, oneOrMore(of: prop(.generalCategory(.other)))) + // L& defined by PCRE. + parseTest(#"\p{L&}"#, prop(.generalCategory(.casedLetter))) + // UAX44-LM3 means all of the below are equivalent. let lowercaseLetter = prop(.generalCategory(.lowercaseLetter)) parseTest(#"\p{ll}"#, lowercaseLetter)