diff --git a/Package.swift b/Package.swift
index f8162e762..8303fc5cb 100644
--- a/Package.swift
+++ b/Package.swift
@@ -67,7 +67,7 @@ let package = Package(
             name: "RegexTests",
             dependencies: ["_StringProcessing"],
             swiftSettings: [
-                .unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
+                .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
             ]),
         .testTarget(
             name: "RegexBuilderTests",
diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index 21fcfa703..2131d1eb5 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -168,7 +168,15 @@ extension Compiler.ByteCodeGen {
   }
   
   mutating func emitCharacter(_ c: Character) throws {
-    // FIXME: Does semantic level matter?
+    // Unicode scalar matches the specific scalars that comprise a character
+    if options.semanticLevel == .unicodeScalar {
+      print("emitting '\(c)' as a sequence of \(c.unicodeScalars.count) scalars")
+      for scalar in c.unicodeScalars {
+        try emitScalar(scalar)
+      }
+      return
+    }
+    
     if options.isCaseInsensitive && c.isCased {
       // TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
       builder.buildConsume { input, bounds in
@@ -625,22 +633,44 @@ extension Compiler.ByteCodeGen {
       try emitAtom(a)
 
     case let .quotedLiteral(s):
-      // TODO: Should this incorporate options?
-      if options.isCaseInsensitive {
-        // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
-        builder.buildConsume { input, bounds in
-          var iterator = s.makeIterator()
+      if options.semanticLevel == .graphemeCluster {
+        if options.isCaseInsensitive {
+          // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
+          builder.buildConsume { input, bounds in
+            var iterator = s.makeIterator()
+            var currentIndex = bounds.lowerBound
+            while let ch = iterator.next() {
+              guard currentIndex < bounds.upperBound,
+                    ch.lowercased() == input[currentIndex].lowercased()
+              else { return nil }
+              input.formIndex(after: &currentIndex)
+            }
+            return currentIndex
+          }
+        } else {
+          builder.buildMatchSequence(s)
+        }
+      } else {
+        builder.buildConsume {
+          [caseInsensitive = options.isCaseInsensitive] input, bounds in
+          // TODO: Case folding
+          var iterator = s.unicodeScalars.makeIterator()
           var currentIndex = bounds.lowerBound
-          while let ch = iterator.next() {
-            guard currentIndex < bounds.upperBound,
-                  ch.lowercased() == input[currentIndex].lowercased()
-            else { return nil }
-            input.formIndex(after: &currentIndex)
+          while let scalar = iterator.next() {
+            guard currentIndex < bounds.upperBound else { return nil }
+            if caseInsensitive {
+              if scalar.properties.lowercaseMapping != input.unicodeScalars[currentIndex].properties.lowercaseMapping {
+                return nil
+              }
+            } else {
+              if scalar != input.unicodeScalars[currentIndex] {
+                return nil
+              }
+            }
+            input.unicodeScalars.formIndex(after: &currentIndex)
           }
           return currentIndex
         }
-      } else {
-        builder.buildMatchSequence(s)
       }
 
     case let .regexLiteral(l):
diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift
index a44c2c876..d27b89314 100644
--- a/Sources/_StringProcessing/ConsumerInterface.swift
+++ b/Sources/_StringProcessing/ConsumerInterface.swift
@@ -111,6 +111,38 @@ extension DSLTree.Atom {
   }
 }
 
+extension String {
+  /// Compares this string to `other` using the loose matching rule UAX44-LM2,
+  /// which ignores case, whitespace, underscores, and nearly all medial
+  /// hyphens.
+  ///
+  /// FIXME: Only ignore medial hyphens
+  /// FIXME: Special case for U+1180 HANGUL JUNGSEONG O-E
+  /// See https://www.unicode.org/reports/tr44/#Matching_Rules
+  fileprivate func isEqualByUAX44LM2(to other: String) -> Bool {
+    var index = startIndex
+    var otherIndex = other.startIndex
+    
+    while index < endIndex && otherIndex < other.endIndex {
+      if self[index].isWhitespace || self[index] == "-" || self[index] == "_" {
+        formIndex(after: &index)
+        continue
+      }
+      if other[otherIndex].isWhitespace || other[otherIndex] == "-" || other[otherIndex] == "_" {
+        other.formIndex(after: &otherIndex)
+        continue
+      }
+      
+      if self[index] != other[otherIndex] && self[index].lowercased() != other[otherIndex].lowercased() {
+        return false
+      }
+
+      formIndex(after: &index)
+      other.formIndex(after: &otherIndex)
+    }
+    return index == endIndex && otherIndex == other.endIndex
+  }
+}
 
 // TODO: This is basically an AST interpreter, which would
 // be good or interesting to build regardless, and serves
@@ -131,6 +163,13 @@ extension AST.Atom {
     }
   }
 
+  var singleScalar: UnicodeScalar? {
+    switch kind {
+    case .scalar(let s): return s
+    default: return nil
+    }
+  }
+
   func generateConsumer(
     _ opts: MatchingOptions
   ) throws -> MEProgram<String>.ConsumeFunction? {
@@ -167,10 +206,12 @@ extension AST.Atom {
       return try p.generateConsumer(opts)
 
     case let .namedCharacter(name):
-      return consumeScalarProp {
-        // TODO: alias? casing?
-        $0.name == name || $0.nameAlias == name
-      }
+      return consumeScalar(propertyScalarPredicate {
+        // FIXME: name aliases not covered by $0.nameAlias are missed
+        // e.g. U+FEFF is also 'FORM FEED', 'BYTE ORDER MARK', and 'BOM'
+        $0.name?.isEqualByUAX44LM2(to: name) == true
+          || $0.nameAlias?.isEqualByUAX44LM2(to: name) == true
+      })
       
     case .any:
       assertionFailure(
@@ -312,8 +353,9 @@ extension DSLTree.CustomCharacterClass {
         }
       }
       if isInverted {
-        // FIXME: semantic level
-        return input.index(after: bounds.lowerBound)
+        return opts.semanticLevel == .graphemeCluster
+          ? input.index(after: bounds.lowerBound)
+          : input.unicodeScalars.index(after: bounds.lowerBound)
       }
       return nil
     }
@@ -321,38 +363,26 @@ extension DSLTree.CustomCharacterClass {
 }
 
 // NOTE: Conveniences, though not most performant
-private func consumeScalarScript(
-  _ s: Unicode.Script
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar {
-    Unicode.Script($0) == s
-  }
+typealias ScalarPredicate = (UnicodeScalar) -> Bool
+
+private func scriptScalarPredicate(_ s: Unicode.Script) -> ScalarPredicate {
+  { Unicode.Script($0) == s }
 }
-private func consumeScalarScriptExtension(
-  _ s: Unicode.Script
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar {
-    let extensions = Unicode.Script.extensions(for: $0)
-    return extensions.contains(s)
-  }
+private func scriptExtensionScalarPredicate(_ s: Unicode.Script) -> ScalarPredicate {
+  { Unicode.Script.extensions(for: $0).contains(s) }
 }
-private func consumeScalarGC(
-  _ gc: Unicode.GeneralCategory
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar { gc == $0.properties.generalCategory }
+private func categoryScalarPredicate(_ gc: Unicode.GeneralCategory) -> ScalarPredicate {
+  { gc == $0.properties.generalCategory }
 }
-private func consumeScalarGCs(
-  _ gcs: [Unicode.GeneralCategory]
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar { gcs.contains($0.properties.generalCategory) }
+private func categoriesScalarPredicate(_ gcs: [Unicode.GeneralCategory]) -> ScalarPredicate {
+  { gcs.contains($0.properties.generalCategory) }
 }
-private func consumeScalarProp(
-  _ p: @escaping (Unicode.Scalar.Properties) -> Bool
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar { p($0.properties) }
+private func propertyScalarPredicate(_ p: @escaping (Unicode.Scalar.Properties) -> Bool) -> ScalarPredicate {
+  { p($0.properties) }
 }
+
 func consumeScalar(
-  _ p: @escaping (Unicode.Scalar) -> Bool
+  _ p: @escaping ScalarPredicate
 ) -> MEProgram<String>.ConsumeFunction {
   { input, bounds in
     // TODO: bounds check?
@@ -364,6 +394,37 @@ func consumeScalar(
     return nil
   }
 }
+func consumeCharacterWithLeadingScalar(
+  _ p: @escaping ScalarPredicate
+) -> MEProgram<String>.ConsumeFunction {
+  { input, bounds in
+    let curIdx = bounds.lowerBound
+    if p(input[curIdx].unicodeScalars.first!) {
+      return input.index(after: curIdx)
+    }
+    return nil
+  }
+}
+func consumeCharacterWithSingleScalar(
+  _ p: @escaping ScalarPredicate
+) -> MEProgram<String>.ConsumeFunction {
+  { input, bounds in
+    let curIdx = bounds.lowerBound
+    
+    if input[curIdx].hasExactlyOneScalar && p(input[curIdx].unicodeScalars.first!) {
+      return input.index(after: curIdx)
+    }
+    return nil
+  }
+}
+
+func consumeFunction(
+  for opts: MatchingOptions
+) -> (@escaping ScalarPredicate) -> MEProgram<String>.ConsumeFunction {
+  opts.semanticLevel == .graphemeCluster
+    ? consumeCharacterWithLeadingScalar
+    : consumeScalar
+}
 
 extension AST.Atom.CharacterProperty {
   func generateConsumer(
@@ -375,16 +436,15 @@ extension AST.Atom.CharacterProperty {
     ) -> MEProgram<String>.ConsumeFunction {
       return { input, bounds in
         if p(input, bounds) != nil { return nil }
-        // TODO: semantic level
+
         // TODO: bounds check
-        return input.unicodeScalars.index(
-          after: bounds.lowerBound)
+        return opts.semanticLevel == .graphemeCluster
+          ? input.index(after: bounds.lowerBound)
+          : input.unicodeScalars.index(after: bounds.lowerBound)
       }
     }
 
-    // FIXME: Below is largely scalar based, for convenience,
-    // but we want a comprehensive treatment to semantic mode
-    // switching.
+    let consume = consumeFunction(for: opts)
     let preInversion: MEProgram<String>.ConsumeFunction =
     try {
       switch kind {
@@ -395,11 +455,16 @@ extension AST.Atom.CharacterProperty {
           return input.index(after: bounds.lowerBound)
         }
       case .assigned:
-        return consumeScalar {
+        return consume {
           $0.properties.generalCategory != .unassigned
         }
       case .ascii:
-        return consumeScalar(\.isASCII)
+        // Note: ASCII must look at the whole character, not just the first
+        // scalar. That is, "e\u{301}" is not an ASCII character, even though
+        // the first scalar is.
+        return opts.semanticLevel == .graphemeCluster
+          ? consumeCharacterWithSingleScalar(\.isASCII)
+          : consumeScalar(\.isASCII)
 
       case .generalCategory(let p):
         return try p.generateConsumer(opts)
@@ -410,10 +475,10 @@ extension AST.Atom.CharacterProperty {
         return value ? cons : invert(cons)
 
       case .script(let s):
-        return consumeScalarScript(s)
+        return consume(scriptScalarPredicate(s))
 
       case .scriptExtension(let s):
-        return consumeScalarScriptExtension(s)
+        return consume(scriptExtensionScalarPredicate(s))
 
       case .posix(let p):
         return p.generateConsumer(opts)
@@ -436,49 +501,48 @@ extension Unicode.BinaryProperty {
   func generateConsumer(
     _ opts: MatchingOptions
   ) throws -> MEProgram<String>.ConsumeFunction {
+    let consume = consumeFunction(for: opts)
+    
     switch self {
-
     case .asciiHexDigit:
-      return consumeScalarProp {
+      return consume(propertyScalarPredicate {
         $0.isHexDigit && $0.isASCIIHexDigit
-      }
+      })
     case .alphabetic:
-      return consumeScalarProp(\.isAlphabetic)
+      return consume(propertyScalarPredicate(\.isAlphabetic))
     case .bidiControl:
       break
-
-
-    case .bidiMirrored: 
-      return consumeScalarProp(\.isBidiMirrored)
+    case .bidiMirrored:
+      return consume(propertyScalarPredicate(\.isBidiMirrored))
     case .cased:
-      return consumeScalarProp(\.isCased)
+      return consume(propertyScalarPredicate(\.isCased))
     case .compositionExclusion:
       break
     case .caseIgnorable:
-      return consumeScalarProp(\.isCaseIgnorable)
+      return consume(propertyScalarPredicate(\.isCaseIgnorable))
     case .changesWhenCasefolded:
-      return consumeScalarProp(\.changesWhenCaseFolded)
+      return consume(propertyScalarPredicate(\.changesWhenCaseFolded))
     case .changesWhenCasemapped:
-      return consumeScalarProp(\.changesWhenCaseMapped)
+      return consume(propertyScalarPredicate(\.changesWhenCaseMapped))
     case .changesWhenNFKCCasefolded:
-      return consumeScalarProp(\.changesWhenNFKCCaseFolded)
+      return consume(propertyScalarPredicate(\.changesWhenNFKCCaseFolded))
     case .changesWhenLowercased:
-      return consumeScalarProp(\.changesWhenLowercased)
+      return consume(propertyScalarPredicate(\.changesWhenLowercased))
     case .changesWhenTitlecased:
-      return consumeScalarProp(\.changesWhenTitlecased)
+      return consume(propertyScalarPredicate(\.changesWhenTitlecased))
     case .changesWhenUppercased:
-      return consumeScalarProp(\.changesWhenUppercased)
+      return consume(propertyScalarPredicate(\.changesWhenUppercased))
     case .dash:
-      return consumeScalarProp(\.isDash)
+      return consume(propertyScalarPredicate(\.isDash))
     case .deprecated:
-      return consumeScalarProp(\.isDeprecated)
+      return consume(propertyScalarPredicate(\.isDeprecated))
     case .defaultIgnorableCodePoint:
-      return consumeScalarProp(\.isDefaultIgnorableCodePoint)
+      return consume(propertyScalarPredicate(\.isDefaultIgnorableCodePoint))
     case .diacratic: // spelling?
-      return consumeScalarProp(\.isDiacritic)
+      return consume(propertyScalarPredicate(\.isDiacritic))
     case .emojiModifierBase:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmojiModifierBase)
+        return consume(propertyScalarPredicate(\.isEmojiModifierBase))
       } else {
         throw Unsupported(
           "isEmojiModifierBase on old OSes")
@@ -487,59 +551,59 @@ extension Unicode.BinaryProperty {
       break
     case .emojiModifier:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmojiModifier)
+        return consume(propertyScalarPredicate(\.isEmojiModifier))
       } else {
         throw Unsupported("isEmojiModifier on old OSes")
       }
     case .emoji:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmoji)
+        return consume(propertyScalarPredicate(\.isEmoji))
       } else {
         throw Unsupported("isEmoji on old OSes")
       }
     case .emojiPresentation:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmojiPresentation)
+        return consume(propertyScalarPredicate(\.isEmojiPresentation))
       } else {
         throw Unsupported(
           "isEmojiPresentation on old OSes")
       }
     case .extender:
-      return consumeScalarProp(\.isExtender)
+      return consume(propertyScalarPredicate(\.isExtender))
     case .extendedPictographic:
       break // NOTE: Stdlib has this data internally
     case .fullCompositionExclusion:
-      return consumeScalarProp(\.isFullCompositionExclusion)
+      return consume(propertyScalarPredicate(\.isFullCompositionExclusion))
     case .graphemeBase:
-      return consumeScalarProp(\.isGraphemeBase)
+      return consume(propertyScalarPredicate(\.isGraphemeBase))
     case .graphemeExtended:
-      return consumeScalarProp(\.isGraphemeExtend)
+      return consume(propertyScalarPredicate(\.isGraphemeExtend))
     case .graphemeLink:
       break
     case .hexDigit:
-      return consumeScalarProp(\.isHexDigit)
+      return consume(propertyScalarPredicate(\.isHexDigit))
     case .hyphen:
       break
     case .idContinue:
-      return consumeScalarProp(\.isIDContinue)
+      return consume(propertyScalarPredicate(\.isIDContinue))
     case .ideographic:
-      return consumeScalarProp(\.isIdeographic)
+      return consume(propertyScalarPredicate(\.isIdeographic))
     case .idStart:
-      return consumeScalarProp(\.isIDStart)
+      return consume(propertyScalarPredicate(\.isIDStart))
     case .idsBinaryOperator:
-      return consumeScalarProp(\.isIDSBinaryOperator)
+      return consume(propertyScalarPredicate(\.isIDSBinaryOperator))
     case .idsTrinaryOperator:
-      return consumeScalarProp(\.isIDSTrinaryOperator)
+      return consume(propertyScalarPredicate(\.isIDSTrinaryOperator))
     case .joinControl:
-      return consumeScalarProp(\.isJoinControl)
+      return consume(propertyScalarPredicate(\.isJoinControl))
     case .logicalOrderException:
-      return consumeScalarProp(\.isLogicalOrderException)
+      return consume(propertyScalarPredicate(\.isLogicalOrderException))
     case .lowercase:
-      return consumeScalarProp(\.isLowercase)
+      return consume(propertyScalarPredicate(\.isLowercase))
     case .math:
-      return consumeScalarProp(\.isMath)
+      return consume(propertyScalarPredicate(\.isMath))
     case .noncharacterCodePoint:
-      return consumeScalarProp(\.isNoncharacterCodePoint)
+      return consume(propertyScalarPredicate(\.isNoncharacterCodePoint))
     case .otherAlphabetic:
       break
     case .otherDefaultIgnorableCodePoint:
@@ -557,37 +621,37 @@ extension Unicode.BinaryProperty {
     case .otherUppercase:
       break
     case .patternSyntax:
-      return consumeScalarProp(\.isPatternSyntax)
+      return consume(propertyScalarPredicate(\.isPatternSyntax))
     case .patternWhitespace:
-      return consumeScalarProp(\.isPatternWhitespace)
+      return consume(propertyScalarPredicate(\.isPatternWhitespace))
     case .prependedConcatenationMark:
       break
     case .quotationMark:
-      return consumeScalarProp(\.isQuotationMark)
+      return consume(propertyScalarPredicate(\.isQuotationMark))
     case .radical:
-      return consumeScalarProp(\.isRadical)
+      return consume(propertyScalarPredicate(\.isRadical))
     case .regionalIndicator:
-      return consumeScalar { s in
+      return consume { s in
         (0x1F1E6...0x1F1FF).contains(s.value)
       }
     case .softDotted:
-      return consumeScalarProp(\.isSoftDotted)
+      return consume(propertyScalarPredicate(\.isSoftDotted))
     case .sentenceTerminal:
-      return consumeScalarProp(\.isSentenceTerminal)
+      return consume(propertyScalarPredicate(\.isSentenceTerminal))
     case .terminalPunctuation:
-      return consumeScalarProp(\.isTerminalPunctuation)
+      return consume(propertyScalarPredicate(\.isTerminalPunctuation))
     case .unifiedIdiograph: // spelling?
-      return consumeScalarProp(\.isUnifiedIdeograph)
+      return consume(propertyScalarPredicate(\.isUnifiedIdeograph))
     case .uppercase:
-      return consumeScalarProp(\.isUppercase)
+      return consume(propertyScalarPredicate(\.isUppercase))
     case .variationSelector:
-      return consumeScalarProp(\.isVariationSelector)
+      return consume(propertyScalarPredicate(\.isVariationSelector))
     case .whitespace:
-      return consumeScalarProp(\.isWhitespace)
+      return consume(propertyScalarPredicate(\.isWhitespace))
     case .xidContinue:
-      return consumeScalarProp(\.isXIDContinue)
+      return consume(propertyScalarPredicate(\.isXIDContinue))
     case .xidStart:
-      return consumeScalarProp(\.isXIDStart)
+      return consume(propertyScalarPredicate(\.isXIDStart))
     case .expandsOnNFC, .expandsOnNFD, .expandsOnNFKD,
         .expandsOnNFKC:
       throw Unsupported("Unicode-deprecated: \(self)")
@@ -602,42 +666,44 @@ extension Unicode.POSIXProperty {
   func generateConsumer(
     _ opts: MatchingOptions
   ) -> MEProgram<String>.ConsumeFunction {
-    // FIXME: semantic levels, modes, etc
+    let consume = consumeFunction(for: opts)
+
+    // FIXME: modes, etc
     switch self {
     case .alnum:
-      return consumeScalarProp {
+      return consume(propertyScalarPredicate {
         $0.isAlphabetic || $0.numericType != nil
-      }
+      })
     case .blank:
-      return consumeScalar { s in
+      return consume { s in
         s.properties.generalCategory == .spaceSeparator ||
         s == "\t"
       }
 
     case .graph:
-      return consumeScalarProp { p in
+      return consume(propertyScalarPredicate { p in
         !(
           p.isWhitespace ||
           p.generalCategory == .control ||
           p.generalCategory == .surrogate ||
           p.generalCategory == .unassigned
         )
-      }
+      })
     case .print:
-      return consumeScalarProp { p in
+      return consume(propertyScalarPredicate { p in
         // FIXME: better def
         p.generalCategory != .control
-      }
+      })
     case .word:
-      return consumeScalarProp { p in
+      return consume(propertyScalarPredicate { p in
         // FIXME: better def
         p.isAlphabetic || p.numericType != nil
         || p.isJoinControl
         || p.isDash// marks and connectors...
-      }
+      })
 
     case .xdigit:
-      return consumeScalarProp(\.isHexDigit) // or number
+      return consume(propertyScalarPredicate(\.isHexDigit)) // or number
 
     }
   }
@@ -648,113 +714,115 @@ extension Unicode.ExtendedGeneralCategory {
   func generateConsumer(
     _ opts: MatchingOptions
   ) throws -> MEProgram<String>.ConsumeFunction {
+    let consume = consumeFunction(for: opts)
+
     switch self {
     case .letter:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .uppercaseLetter, .lowercaseLetter,
         .titlecaseLetter, .modifierLetter,
         .otherLetter
-      ])
+      ]))
 
     case .mark:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .nonspacingMark, .spacingMark, .enclosingMark
-      ])
+      ]))
 
     case .number:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .decimalNumber, .letterNumber, .otherNumber
-      ])
+      ]))
 
     case .symbol:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .mathSymbol, .currencySymbol, .modifierSymbol,
         .otherSymbol
-      ])
+      ]))
 
     case .punctuation:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .connectorPunctuation, .dashPunctuation,
         .openPunctuation, .closePunctuation,
         .initialPunctuation, .finalPunctuation,
         .otherPunctuation
-      ])
+      ]))
 
     case .separator:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .spaceSeparator, .lineSeparator, .paragraphSeparator
-      ])
+      ]))
 
     case .other:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .control, .format, .surrogate, .privateUse, .unassigned
-      ])
+      ]))
 
     case .casedLetter:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .uppercaseLetter, .lowercaseLetter, .titlecaseLetter
-      ])
+      ]))
 
     case .control:
-      return consumeScalarGC(.control)
+      return consume(categoryScalarPredicate(.control))
     case .format:
-      return consumeScalarGC(.format)
+      return consume(categoryScalarPredicate(.format))
     case .unassigned:
-      return consumeScalarGC(.unassigned)
+      return consume(categoryScalarPredicate(.unassigned))
     case .privateUse:
-      return consumeScalarGC(.privateUse)
+      return consume(categoryScalarPredicate(.privateUse))
     case .surrogate:
-      return consumeScalarGC(.surrogate)
+      return consume(categoryScalarPredicate(.surrogate))
     case .lowercaseLetter:
-      return consumeScalarGC(.lowercaseLetter)
+      return consume(categoryScalarPredicate(.lowercaseLetter))
     case .modifierLetter:
-      return consumeScalarGC(.modifierLetter)
+      return consume(categoryScalarPredicate(.modifierLetter))
     case .otherLetter:
-      return consumeScalarGC(.otherLetter)
+      return consume(categoryScalarPredicate(.otherLetter))
     case .titlecaseLetter:
-      return consumeScalarGC(.titlecaseLetter)
+      return consume(categoryScalarPredicate(.titlecaseLetter))
     case .uppercaseLetter:
-      return consumeScalarGC(.uppercaseLetter)
+      return consume(categoryScalarPredicate(.uppercaseLetter))
     case .spacingMark:
-      return consumeScalarGC(.spacingMark)
+      return consume(categoryScalarPredicate(.spacingMark))
     case .enclosingMark:
-      return consumeScalarGC(.enclosingMark)
+      return consume(categoryScalarPredicate(.enclosingMark))
     case .nonspacingMark:
-      return consumeScalarGC(.nonspacingMark)
+      return consume(categoryScalarPredicate(.nonspacingMark))
     case .decimalNumber:
-      return consumeScalarGC(.decimalNumber)
+      return consume(categoryScalarPredicate(.decimalNumber))
     case .letterNumber:
-      return consumeScalarGC(.letterNumber)
+      return consume(categoryScalarPredicate(.letterNumber))
     case .otherNumber:
-      return consumeScalarGC(.otherNumber)
+      return consume(categoryScalarPredicate(.otherNumber))
     case .connectorPunctuation:
-      return consumeScalarGC(.connectorPunctuation)
+      return consume(categoryScalarPredicate(.connectorPunctuation))
     case .dashPunctuation:
-      return consumeScalarGC(.dashPunctuation)
+      return consume(categoryScalarPredicate(.dashPunctuation))
     case .closePunctuation:
-      return consumeScalarGC(.closePunctuation)
+      return consume(categoryScalarPredicate(.closePunctuation))
     case .finalPunctuation:
-      return consumeScalarGC(.finalPunctuation)
+      return consume(categoryScalarPredicate(.finalPunctuation))
     case .initialPunctuation:
-      return consumeScalarGC(.initialPunctuation)
+      return consume(categoryScalarPredicate(.initialPunctuation))
     case .otherPunctuation:
-      return consumeScalarGC(.otherPunctuation)
+      return consume(categoryScalarPredicate(.otherPunctuation))
     case .openPunctuation:
-      return consumeScalarGC(.openPunctuation)
+      return consume(categoryScalarPredicate(.openPunctuation))
     case .currencySymbol:
-      return consumeScalarGC(.currencySymbol)
+      return consume(categoryScalarPredicate(.currencySymbol))
     case .modifierSymbol:
-      return consumeScalarGC(.modifierSymbol)
+      return consume(categoryScalarPredicate(.modifierSymbol))
     case .mathSymbol:
-      return consumeScalarGC(.mathSymbol)
+      return consume(categoryScalarPredicate(.mathSymbol))
     case .otherSymbol:
-      return consumeScalarGC(.otherSymbol)
+      return consume(categoryScalarPredicate(.otherSymbol))
     case .lineSeparator:
-      return consumeScalarGC(.lineSeparator)
+      return consume(categoryScalarPredicate(.lineSeparator))
     case .paragraphSeparator:
-      return consumeScalarGC(.paragraphSeparator)
+      return consume(categoryScalarPredicate(.paragraphSeparator))
     case .spaceSeparator:
-      return consumeScalarGC(.spaceSeparator)
+      return consume(categoryScalarPredicate(.spaceSeparator))
     }
   }
 }
diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift
index ef98a7b8f..47433dc42 100644
--- a/Sources/_StringProcessing/Regex/ASTConversion.swift
+++ b/Sources/_StringProcessing/Regex/ASTConversion.swift
@@ -65,13 +65,17 @@ extension AST.Node {
             // TODO: For printing, nice to coalesce
             // scalars literals too. We likely need a different
             // approach even before we have a better IR.
-            guard let char = atom?.singleCharacter else {
+            if let char = atom?.singleCharacter  {
+              result.append(char)
+            } else if let scalar = atom?.singleScalar {
+              result.append(Character(scalar))
+            } else {
               break
             }
-            result.append(char)
+            
             astChildren.formIndex(after: &idx)
           }
-          return result.count <= 1 ? nil : (idx, result)
+          return result.isEmpty ? nil : (idx, result)
         }
 
         // No need to nest single children concatenations
@@ -207,7 +211,7 @@ extension AST.Atom {
 
     switch self.kind {
     case let .char(c):                    return .char(c)
-    case let .scalar(s):                  return .scalar(s)
+    case let .scalar(s):                  return .char(Character(s))
     case .any:                            return .any
     case let .backreference(r):           return .backreference(.init(ast: r))
     case let .changeMatchingOptions(seq): return .changeMatchingOptions(.init(ast: seq))
diff --git a/Sources/_StringProcessing/Unicode/CharacterProps.swift b/Sources/_StringProcessing/Unicode/CharacterProps.swift
index cfa68c425..80f6819a6 100644
--- a/Sources/_StringProcessing/Unicode/CharacterProps.swift
+++ b/Sources/_StringProcessing/Unicode/CharacterProps.swift
@@ -12,3 +12,9 @@
 
 // TODO
 
+extension Character {
+  /// Whether this character is made up of exactly one Unicode scalar value.
+  var hasExactlyOneScalar: Bool {
+    unicodeScalars.index(after: unicodeScalars.startIndex) == unicodeScalars.endIndex
+  }
+}
diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift
index 4d0c12c1f..fc3fd5741 100644
--- a/Sources/_StringProcessing/_CharacterClassModel.swift
+++ b/Sources/_StringProcessing/_CharacterClassModel.swift
@@ -194,11 +194,14 @@ public struct _CharacterClassModel: Hashable {
       return matched ? next : nil
     case .unicodeScalar:
       let c = str.unicodeScalars[i]
+      var nextIndex = str.unicodeScalars.index(after: i)
       var matched: Bool
       switch cc {
       case .any: matched = true
       case .anyScalar: matched = true
-      case .anyGrapheme: fatalError("Not matched in this mode")
+      case .anyGrapheme:
+        matched = true
+        nextIndex = str.index(after: i)
       case .digit:
         matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits)
       case .hexDigit:
@@ -215,7 +218,7 @@ public struct _CharacterClassModel: Hashable {
       if isInverted {
         matched.toggle()
       }
-      return matched ? str.unicodeScalars.index(after: i) : nil
+      return matched ? nextIndex : nil
     }
   }
 }
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index 2c6b858cc..83b73fe35 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -169,6 +169,8 @@ func firstMatchTest(
       XCTAssertEqual(found, match, file: file, line: line)
     }
   } catch {
+    // FIXME: This allows non-matches to succeed even when xfail'd
+    // When xfail == true, this should report failure for match == nil
     if !xfail && match != nil {
       XCTFail("\(error)", file: file, line: line)
     }
@@ -182,7 +184,9 @@ func firstMatchTests(
   syntax: SyntaxOptions = .traditional,
   enableTracing: Bool = false,
   dumpAST: Bool = false,
-  xfail: Bool = false
+  xfail: Bool = false,
+  file: StaticString = #filePath,
+  line: UInt = #line
 ) {
   for (input, match) in tests {
     firstMatchTest(
@@ -192,7 +196,9 @@ func firstMatchTests(
       syntax: syntax,
       enableTracing: enableTracing,
       dumpAST: dumpAST,
-      xfail: xfail)
+      xfail: xfail,
+      file: file,
+      line: line)
   }
 }
 
@@ -400,7 +406,8 @@ extension RegexTests {
       "a++a",
       ("babc", nil),
       ("baaabc", nil),
-      ("bb", nil))
+      ("bb", nil),
+      xfail: true)
     firstMatchTests(
       "a+?a",
       ("babc", nil),
@@ -462,15 +469,11 @@ extension RegexTests {
       "a{2,4}+a",
       ("babc", nil),
       ("baabc", nil),
-      ("baaabc", nil),
       ("baaaaabc", "aaaaa"),
       ("baaaaaaaabc", "aaaaa"),
       ("bb", nil))
     firstMatchTests(
       "a{,4}+a",
-      ("babc", nil),
-      ("baabc", nil),
-      ("baaabc", nil),
       ("baaaaabc", "aaaaa"),
       ("baaaaaaaabc", "aaaaa"),
       ("bb", nil))
@@ -478,11 +481,44 @@ extension RegexTests {
       "a{2,}+a",
       ("babc", nil),
       ("baabc", nil),
+      ("bb", nil))
+    
+    // XFAIL'd versions of the above
+    firstMatchTests(
+      "a{2,4}+a",
+      ("baaabc", nil),
+      xfail: true)
+    firstMatchTests(
+      "a{,4}+a",
+      ("babc", nil),
+      ("baabc", nil),
+      ("baaabc", nil),
+      xfail: true)
+    firstMatchTests(
+      "a{2,}+a",
       ("baaabc", nil),
       ("baaaaabc", nil),
       ("baaaaaaaabc", nil),
-      ("bb", nil))
+      xfail: true)
 
+    // XFAIL'd possessive tests
+    firstMatchTests(
+      "a?+a",
+      ("a", nil),
+      xfail: true)
+    firstMatchTests(
+      "(a|a)?+a",
+      ("a", nil),
+      xfail: true)
+    firstMatchTests(
+      "(a|a){2,4}+a",
+      ("a", nil),
+      ("aa", nil))
+    firstMatchTests(
+      "(a|a){2,4}+a",
+      ("aaa", nil),
+      ("aaaa", nil),
+      xfail: true)
 
     firstMatchTests(
       "(?:a{2,4}?b)+",
@@ -946,15 +982,19 @@ extension RegexTests {
 
     // TODO: Oniguruma \y and \Y
     firstMatchTests(
-      #"\u{65}"#,             // Scalar 'e' is present in both:
-      ("Cafe\u{301}", "e"),   // composed and
-      ("Sol Cafe", "e"))      // standalone
+      #"\u{65}"#,             // Scalar 'e' is present in both
+      ("Cafe\u{301}", nil),   // but scalar mode requires boundary at end of match
+      xfail: true)
+    firstMatchTests(
+      #"\u{65}"#,             // Scalar 'e' is present in both
+      ("Sol Cafe", "e"))      // standalone is okay
+
     firstMatchTests(
       #"\u{65}\y"#,           // Grapheme boundary assertion
       ("Cafe\u{301}", nil),
       ("Sol Cafe", "e"))
     firstMatchTests(
-      #"\u{65}\Y"#,           // Grapheme non-boundary assertion
+      #"(?u)\u{65}\Y"#,       // Grapheme non-boundary assertion
       ("Cafe\u{301}", "e"),
       ("Sol Cafe", nil))
   }
@@ -1361,11 +1401,11 @@ extension RegexTests {
     // as a character.
 
     firstMatchTest(#"\u{65}\u{301}$"#, input: eDecomposed, match: eDecomposed)
-    // FIXME: Decomposed character in regex literal doesn't match an equivalent character
-    firstMatchTest(#"\u{65}\u{301}$"#, input: eComposed, match: eComposed,
-      xfail: true)
+    firstMatchTest(#"\u{65}\u{301}$"#, input: eComposed, match: eComposed)
 
-    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: "e")
+    // FIXME: Implicit \y at end of match
+    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil,
+      xfail: true)
     firstMatchTest(#"\u{65}$"#, input: eDecomposed, match: nil)
     // FIXME: \y is unsupported
     firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil,
@@ -1389,12 +1429,10 @@ extension RegexTests {
       (eComposed, true),
       (eDecomposed, true))
 
-    // FIXME: Decomposed character in regex literal doesn't match an equivalent character
     matchTest(
       #"e\u{301}$"#,
       (eComposed, true),
-      (eDecomposed, true),
-      xfail: true)
+      (eDecomposed, true))
 
     matchTest(
       #"e$"#,
@@ -1415,9 +1453,7 @@ extension RegexTests {
       (eDecomposed, true))
     // \p{Letter}
     firstMatchTest(#"\p{Letter}$"#, input: eComposed, match: eComposed)
-    // FIXME: \p{Letter} doesn't match a decomposed character
-    firstMatchTest(#"\p{Letter}$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
+    firstMatchTest(#"\p{Letter}$"#, input: eDecomposed, match: eDecomposed)
     
     // \d
     firstMatchTest(#"\d"#, input: "5", match: "5")
@@ -1480,7 +1516,8 @@ extension RegexTests {
     firstMatchTest(#"\u{1F1F0}\u{1F1F7}"#, input: flag, match: flag)
     
     // First Unicode scalar followed by CCC of regional indicators
-    firstMatchTest(#"\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: flag)
+    firstMatchTest(#"\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: flag,
+              xfail: true)
 
     // FIXME: CCC of Regional Indicator doesn't match with both parts of a flag character
     // A CCC of regional indicators x 2
@@ -1521,8 +1558,7 @@ extension RegexTests {
     
     // FIXME: \O is unsupported
     firstMatchTest(#"(?u)\O\u{301}"#, input: eDecomposed, match: eDecomposed)
-    firstMatchTest(#"(?u)e\O"#, input: eDecomposed, match: eDecomposed,
-      xfail: true)
+    firstMatchTest(#"(?u)e\O"#, input: eDecomposed, match: eDecomposed)
     firstMatchTest(#"\O"#, input: eComposed, match: eComposed)
     firstMatchTest(#"\O"#, input: eDecomposed, match: nil,
               xfail: true)
diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift
new file mode 100644
index 000000000..71f459a1b
--- /dev/null
+++ b/Tests/RegexTests/UTS18Tests.swift
@@ -0,0 +1,589 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+// This test suite includes tests that verify the behavior of `Regex` as it
+// relates to Unicode Technical Standard #18: Unicode Regular Expressions.
+//
+// Please note: Quotations of UTS18 in this file mostly use 'Character' to mean
+// Unicode code point, and 'String' to mean 'sequence of code points' — they
+// are not the Swift meanings of those terms.
+//
+// See https://unicode.org/reports/tr18/ for more.
+
+import XCTest
+@testable // for internal `matches(of:)`
+import _StringProcessing
+
+class UTS18Tests: XCTestCase {
+  var input: String {
+    "ABCdefghîøu\u{308}\u{FFF0} -–—[]123"
+  // 01234567890       1       234567890
+  // 0                10               20
+  }
+}
+
+fileprivate func regex(_ pattern: String) -> Regex<Substring> {
+  try! Regex(pattern, as: Substring.self)
+}
+
+fileprivate extension String {
+  subscript<R: RangeExpression>(pos bounds: R) -> Substring
+    where R.Bound == Int
+  {
+    let bounds = bounds.relative(to: 0..<count)
+    return dropFirst(bounds.lowerBound).prefix(bounds.count)
+  }
+}
+
+fileprivate func expectFirstMatch<Output: Equatable>(
+  _ input: String,
+  _ r: Regex<Output>,
+  _ output: Output,
+  file: StaticString = #file,
+  line: UInt = #line)
+{
+  XCTAssertEqual(input.firstMatch(of: r)?.output, output, file: file, line: line)
+}
+
+#if os(Linux)
+func XCTExpectFailure(_ message: String? = nil, body: () -> Void) {}
+#endif
+
+// MARK: - Basic Unicode Support: Level 1
+
+// C1. An implementation claiming conformance to Level 1 of this specification
+// shall meet the requirements described in the following sections:
+extension UTS18Tests {
+  // RL1.1 Hex Notation
+  //
+  // To meet this requirement, an implementation shall supply a mechanism for
+  // specifying any Unicode code point (from U+0000 to U+10FFFF), using the
+  // hexadecimal code point representation.
+  func testHexNotation() {
+    expectFirstMatch("ab", regex(#"\u{61}\u{62}"#), "ab")
+    expectFirstMatch("𝄞", regex(#"\u{1D11E}"#), "𝄞")
+  }
+  
+  // 1.1.1 Hex Notation and Normalization
+  //
+  // TODO: Does this section make a recommendation?
+  
+  // RL1.2	Properties
+  // To meet this requirement, an implementation shall provide at least a
+  // minimal list of properties, consisting of the following:
+  // - General_Category
+  // - Script and Script_Extensions
+  // - Alphabetic
+  // - Uppercase
+  // - Lowercase
+  // - White_Space
+  // - Noncharacter_Code_Point
+  // - Default_Ignorable_Code_Point
+  // - ANY, ASCII, ASSIGNED
+  // The values for these properties must follow the Unicode definitions, and
+  // include the property and property value aliases from the UCD. Matching of
+  // Binary, Enumerated, Catalog, and Name values must follow the Matching
+  // Rules from [UAX44] with one exception: implementations are not required
+  // to ignore an initial prefix string of "is" in property values.
+  func testProperties() {
+    // General_Category
+    expectFirstMatch(input, regex(#"\p{Lu}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{lu}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{uppercase letter}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{Uppercase Letter}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{Uppercase_Letter}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{uppercaseletter}+"#), input[pos: ..<3])
+    
+    expectFirstMatch(input, regex(#"\p{P}+"#), "-–—[]")
+    expectFirstMatch(input, regex(#"\p{Pd}+"#), "-–—")
+    
+    expectFirstMatch(input, regex(#"\p{Any}+"#), input[...])
+    expectFirstMatch(input, regex(#"\p{Assigned}+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"\p{ASCII}+"#), input[pos: ..<8])
+    
+    // Script and Script_Extensions
+    //    U+3042  あ  HIRAGANA LETTER A  Hira  {Hira}
+    XCTAssertTrue("\u{3042}".contains(regex(#"\p{Hira}"#)))
+    XCTAssertTrue("\u{3042}".contains(regex(#"\p{sc=Hira}"#)))
+    XCTAssertTrue("\u{3042}".contains(regex(#"\p{scx=Hira}"#)))
+    //    U+30FC  ー  KATAKANA-HIRAGANA PROLONGED SOUND MARK  Zyyy = Common  {Hira, Kana}
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{Hira}"#)))      // Implicit = Script_Extensions
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{Kana}"#)))
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{sc=Zyyy}"#)))   // Explicit = Script
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{scx=Hira}"#)))
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{scx=Kana}"#)))
+    XCTAssertFalse("\u{30FC}".contains(regex(#"\p{sc=Hira}"#)))
+    XCTAssertFalse("\u{30FC}".contains(regex(#"\p{sc=Kana}"#)))
+    
+    // Uppercase, etc
+    expectFirstMatch(input, regex(#"\p{Uppercase}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{isUppercase}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{Uppercase=true}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{is Uppercase}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{is uppercase = true}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{lowercase}+"#), input[pos: 3..<11])
+    expectFirstMatch(input, regex(#"\p{whitespace}+"#), input[pos: 12..<13])
+
+    // Block vs Writing System
+    let greekScalar = "Θ" // U+0398
+    let greekExtendedScalar = "ἀ" // U+1F00
+    XCTAssertTrue(greekScalar.contains(regex(#"\p{Greek}"#)))
+    XCTAssertTrue(greekExtendedScalar.contains(regex(#"\p{Greek}"#)))
+  }
+  
+  func testProperties_XFail() {
+    XCTExpectFailure("Need to support 'age' and 'block' properties") {
+      // XCTAssertFalse("z".contains(#/\p{age=3.1}/#))
+      XCTFail(#"\(#/\p{age=3.1}/#)"#)
+      // XCTAssertTrue("\u{1F00}".contains(#/\p{Block=Greek}/#))
+      XCTFail(#"\(#/\p{Block=Greek}/#)"#)
+    }
+  }
+  
+  // RL1.2a	Compatibility Properties
+  // To meet this requirement, an implementation shall provide the properties
+  // listed in Annex C: Compatibility Properties, with the property values as
+  // listed there. Such an implementation shall document whether it is using
+  // the Standard Recommendation or POSIX-compatible properties.
+  func testCompatibilityProperties() throws {
+    // FIXME: These tests seem insufficient
+    expectFirstMatch(input, regex(#"[[:alpha:]]+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"[[:upper:]]+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"[[:lower:]]+"#), input[pos: 3..<11])
+    expectFirstMatch(input, regex(#"[[:punct:]]+"#), input[pos: 13..<18])
+    expectFirstMatch(input, regex(#"[[:digit:]]+"#), input[pos: 18..<21])
+    expectFirstMatch(input, regex(#"[[:xdigit:]]+"#), input[pos: ..<6])
+    expectFirstMatch(input, regex(#"[[:alnum:]]+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"[[:space:]]+"#), input[pos: 12..<13])
+    // TODO: blank
+    // TODO: cntrl
+    expectFirstMatch(input, regex(#"[[:graph:]]+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"[[:print:]]+"#), input[...])
+    expectFirstMatch(input, regex(#"[[:word:]]+"#), input[pos: ..<11])
+  }
+  
+  //RL1.3 Subtraction and Intersection
+  //
+  // To meet this requirement, an implementation shall supply mechanisms for
+  // union, intersection and set-difference of sets of characters within
+  // regular expression character class expressions.
+  func testSubtractionAndIntersection() throws {
+    // Non-ASCII letters
+    expectFirstMatch(input, regex(#"[\p{Letter}--\p{ASCII}]+"#), input[pos: 8..<11])
+    // Digits that aren't 1 or 2
+    expectFirstMatch(input, regex(#"[\p{digit}--[12]]+"#), input[pos: 20..<21])
+    
+    // ASCII-only letters
+    expectFirstMatch(input, regex(#"[\p{Letter}&&\p{ASCII}]+"#), input[pos: ..<8])
+    // Digits that are 2 or 3
+    expectFirstMatch(input, regex(#"[\p{digit}&&[23]]+"#), input[pos: 19..<21])
+    
+    // Non-ASCII lowercase + non-lowercase ASCII
+    expectFirstMatch(input, regex(#"[\p{lowercase}~~\p{ascii}]+"#), input[pos: ..<3])
+    XCTAssertTrue("123%&^ABC".contains(regex(#"^[\p{lowercase}~~\p{ascii}]+$"#)))
+  }
+  
+  func testSubtractionAndIntersectionPrecedence() {
+    expectFirstMatch("ABC123-", regex(#"[[:alnum:]]*-"#), "ABC123-")
+    expectFirstMatch("ABC123-", regex(#"[[:alnum:]--\p{Uppercase}]*-"#), "123-")
+    // Union binds more closely than difference
+    expectFirstMatch("ABC123-", regex(#"[[:alnum:]--\p{Uppercase}[:digit:]]*-"#), "-")
+    // TODO: Test for intersection precedence
+  }
+  
+  // RL1.4 Simple Word Boundaries
+  // To meet this requirement, an implementation shall extend the word boundary
+  // mechanism so that:
+  // - The class of <word_character> includes all the Alphabetic values from the
+  //   Unicode character database, from UnicodeData.txt, plus the decimals
+  //   (General_Category=Decimal_Number, or equivalently Numeric_Type=Decimal),
+  //   and the U+200C ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER
+  //   (Join_Control=True). See also Annex C: Compatibility Properties.
+  // - Nonspacing marks are never divided from their base characters, and
+  //   otherwise ignored in locating boundaries.
+  func testSimpleWordBoundaries() {
+    let simpleWordRegex = regex(#".+?\b"#).wordBoundaryKind(.unicodeLevel1)
+    expectFirstMatch(input, simpleWordRegex, input[pos: ..<11])
+    expectFirstMatch("don't", simpleWordRegex, "don")
+    expectFirstMatch("Cafe\u{301}", simpleWordRegex, "Café")
+  }
+  
+  // RL1.5 Simple Loose Matches
+  //
+  // To meet this requirement, if an implementation provides for case-
+  // insensitive matching, then it shall provide at least the simple, default
+  // Unicode case-insensitive matching, and specify which properties are closed
+  // and which are not.
+  //
+  // To meet this requirement, if an implementation provides for case
+  // conversions, then it shall provide at least the simple, default Unicode
+  // case folding.
+  func testSimpleLooseMatches() {
+    expectFirstMatch("Dåb", regex(#"Dåb"#).ignoresCase(), "Dåb")
+    expectFirstMatch("dÅB", regex(#"Dåb"#).ignoresCase(), "dÅB")
+    expectFirstMatch("D\u{212B}B", regex(#"Dåb"#).ignoresCase(), "D\u{212B}B")
+  }
+
+  func testSimpleLooseMatches_XFail() {
+    XCTExpectFailure("Need case folding support") {
+      let sigmas = "σΣς"
+      expectFirstMatch(sigmas, regex(#"σ+"#).ignoresCase(), sigmas[...])
+      expectFirstMatch(sigmas, regex(#"Σ+"#).ignoresCase(), sigmas[...])
+      expectFirstMatch(sigmas, regex(#"ς+"#).ignoresCase(), sigmas[...])
+      
+      // TODO: Test German sharp S
+      // TODO: Test char classes, e.g. [\p{Block=Phonetic_Extensions} [A-E]]
+    }
+  }
+  
+  // RL1.6 Line Boundaries
+  //
+  // To meet this requirement, if an implementation provides for line-boundary
+  // testing, it shall recognize not only CRLF, LF, CR, but also NEL (U+0085),
+  // PARAGRAPH SEPARATOR (U+2029) and LINE SEPARATOR (U+2028).
+  func testLineBoundaries() {
+    let lineInput = """
+      01
+      02\r\
+      03\n\
+      04\u{a}\
+      05\u{b}\
+      06\u{c}\
+      07\u{d}\
+      08\u{d}\u{a}\
+      09\u{85}\
+      10\u{2028}\
+      11\u{2029}\
+      
+      """
+    // Check the input counts
+    var lines = lineInput.matches(of: regex(#"\d{2}"#))
+    XCTAssertEqual(lines.count, 11)
+    // Test \R - newline sequence
+    lines = lineInput.matches(of: regex(#"\d{2}\R"#))
+    XCTAssertEqual(lines.count, 11)
+    // Test anchors as line boundaries
+    lines = lineInput.matches(of: regex(#"^\d{2}$"#).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 11)
+    // Test that dot does not match line endings
+    lines = lineInput.matches(of: regex(#".+"#))
+    XCTAssertEqual(lines.count, 11)
+    
+    // Does not contain an empty line
+    XCTAssertFalse(lineInput.contains(regex(#"^$"#)))
+    // Does contain an empty line (between \n and \r, which are reversed here)
+    let empty = "\n\r"
+    XCTAssertTrue(empty.contains(regex(#"^$"#).anchorsMatchLineEndings()))
+  }
+  
+  // RL1.7 Supplementary Code Points
+  //
+  // To meet this requirement, an implementation shall handle the full range of
+  // Unicode code points, including values from U+FFFF to U+10FFFF. In
+  // particular, where UTF-16 is used, a sequence consisting of a leading
+  // surrogate followed by a trailing surrogate shall be handled as a single
+  // code point in matching.
+  func testSupplementaryCodePoints() {
+    XCTAssertTrue("👍".contains(regex(#"\u{1F44D}"#)))
+    XCTAssertTrue("👍".contains(regex(#"[\u{1F440}-\u{1F44F}]"#)))
+    XCTAssertTrue("👍👎".contains(regex(#"^[\u{1F440}-\u{1F44F}]+$"#)))
+  }
+}
+
+// MARK: - Extended Unicode Support: Level 2
+
+// C2.  An implementation claiming conformance to Level 2 of this specification
+// shall satisfy C1, and meet the requirements described in the following
+// sections:
+extension UTS18Tests {
+  // RL2.1 Canonical Equivalents
+  //
+  // Specific recommendation?
+  func testCanonicalEquivalents() {
+    let equivalents = [
+      "\u{006f}\u{031b}\u{0323}",     // o + horn + dot_below
+      "\u{006f}\u{0323}\u{031b}",     // o + dot_below + horn
+      "\u{01a1}\u{0323}",             // o-horn + dot_below
+      "\u{1ecd}\u{031b}",             // o-dot_below + horn
+      "\u{1ee3}",                     // o-horn-dot_below
+    ]
+    
+    let regexes = [
+      regex(#"\u{006f}\u{031b}\u{0323}"#),   // o + horn + dot_below
+      regex(#"\u{006f}\u{0323}\u{031b}"#),   // o + dot_below + horn
+      regex(#"\u{01a1}\u{0323}"#),           // o-horn + dot_below
+      regex(#"\u{1ecd}\u{031b}"#),           // o-dot_below + horn
+      regex(#"\u{1ee3}"#),                   // o-horn-dot_below
+    ]
+
+    // Default: Grapheme cluster semantics
+    for (regexNum, regex) in regexes.enumerated() {
+      for (equivNum, equiv) in equivalents.enumerated() {
+        XCTAssertTrue(
+          equiv.contains(regex),
+          "Grapheme cluster semantics: Regex \(regexNum) didn't match with string \(equivNum)")
+      }
+    }
+    
+    // Unicode scalar semantics
+    for (regexNum, regex) in regexes.enumerated() {
+      for (equivNum, equiv) in equivalents.enumerated() {
+        let regex = regex.matchingSemantics(.unicodeScalar)
+        if regexNum == equivNum {
+          XCTAssertTrue(
+            equiv.contains(regex),
+            "Unicode scalar semantics: Regex \(regexNum) didn't match with string \(equivNum)")
+        } else {
+          XCTAssertFalse(
+            equiv.contains(regex),
+            "Unicode scalar semantics: Regex \(regexNum) incorrectly matched with string \(equivNum)")
+        }
+      }
+    }
+  }
+  
+  // RL2.2 Extended Grapheme Clusters and Character Classes with Strings
+  //
+  // To meet this requirement, an implementation shall provide a mechanism for
+  // matching against an arbitrary extended grapheme cluster, Character Classes
+  // with Strings, and extended grapheme cluster boundaries.
+  func testExtendedGraphemeClusters() {
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef.$"#)))
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef\X$"#)))
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef\X$"#).matchingSemantics(.unicodeScalar)))
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef.+\y"#).matchingSemantics(.unicodeScalar)))
+  }
+  
+  func testCharacterClassesWithStrings() {
+    let regex = regex(#"[a-z🧐🇧🇪🇧🇫🇧🇬]"#)
+    XCTAssertTrue("🧐".contains(regex))
+    XCTAssertTrue("🇧🇫".contains(regex))
+  }
+  
+  // RL2.3 Default Word Boundaries
+  //
+  // To meet this requirement, an implementation shall provide a mechanism for
+  // matching Unicode default word boundaries.
+  func testDefaultWordBoundaries() {
+    XCTExpectFailure { XCTFail("Implement tests") }
+  }
+
+  // RL2.4 Default Case Conversion
+  //
+  // To meet this requirement, if an implementation provides for case
+  // conversions, then it shall provide at least the full, default Unicode case
+  // folding.
+  func testDefaultCaseConversion() {
+    XCTExpectFailure { XCTFail("Implement tests") }
+  }
+  
+  // RL2.5 Name Properties
+  //
+  // To meet this requirement, an implementation shall support individually
+  // named characters.
+  func testNameProperty_XFail() {
+    XCTExpectFailure("Need \\p{name=...} support") {
+      XCTFail(#"\(#/\p{name=BOM}/#)"#)
+      // Name property
+      // XCTAssertTrue("\u{FEFF}".contains(#/\p{name=ZERO WIDTH NO-BREAK SPACE}/#))
+      // Name property and Matching Rules
+      // XCTAssertTrue("\u{FEFF}".contains(#/\p{name=zerowidthno breakspace}/#))
+      // Name_Alias property
+      // XCTAssertTrue("\u{FEFF}".contains(#/\p{name=BYTE ORDER MARK}/#))
+      // Name_Alias property (again)
+      // XCTAssertTrue("\u{FEFF}".contains(#/\p{name=BOM}/#))
+
+      // Computed name
+      // XCTAssertTrue("강".contains(#/\p{name=HANGUL SYLLABLE GANG}/#))
+
+      // Control character
+      // XCTAssertTrue("\u{7}".contains(#/\p{name=BEL}/#))
+      // Graphic symbol
+      // XCTAssertTrue("\u{1F514}".contains(#/\p{name=BELL}/#))
+    }
+  }
+  
+  func testIndividuallyNamedCharacters() {
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{WHITE SMILING FACE}"#)))
+    XCTAssertTrue("\u{3B1}".contains(regex(#"\N{GREEK SMALL LETTER ALPHA}"#)))
+    XCTAssertTrue("\u{10450}".contains(regex(#"\N{SHAVIAN LETTER PEEP}"#)))
+    
+    XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{ZERO WIDTH NO-BREAK SPACE}"#)))
+    XCTAssertTrue("강".contains(regex(#"\N{HANGUL SYLLABLE GANG}"#)))
+    XCTAssertTrue("\u{1F514}".contains(regex(#"\N{BELL}"#)))
+    XCTAssertTrue("🐯".contains(regex(#"\N{TIGER FACE}"#)))
+    XCTAssertFalse("🐯".contains(regex(#"\N{TIEGR FACE}"#)))
+
+    // Loose matching
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{whitesmilingface}"#)))
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{wHiTe_sMiLiNg_fAcE}"#)))
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{White Smiling-Face}"#)))
+    XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{zerowidthno breakspace}"#)))
+
+    // Matching semantic level
+    XCTAssertFalse("👩‍👩‍👧‍👦".contains(regex(#".\N{ZERO WIDTH JOINER}"#)))
+    XCTAssertTrue("👩‍👩‍👧‍👦".contains(regex(#"(?u).\N{ZERO WIDTH JOINER}"#)))
+  }
+
+  func testIndividuallyNamedCharacters_XFail() {
+    XCTExpectFailure("Need to support named chars in custom character classes") {
+      XCTFail("\(regex(#"[\N{GREEK SMALL LETTER ALPHA}-\N{GREEK SMALL LETTER BETA}]+"#))")
+      // XCTAssertTrue("^\u{3B1}\u{3B2}$".contains(#/[\N{GREEK SMALL LETTER ALPHA}-\N{GREEK SMALL LETTER BETA}]+/#))
+    }
+    
+    XCTExpectFailure("Other named char failures -- investigate") {
+      XCTAssertTrue("\u{C}".contains(regex(#"\N{FORM FEED}"#)))
+      XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{BYTE ORDER MARK}"#)))
+      XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{BOM}"#)))
+      XCTAssertTrue("\u{7}".contains(regex(#"\N{BEL}"#)))
+    }
+    
+    XCTExpectFailure("Need to recognize invalid names at compile time") {
+      XCTFail("This should be a compilation error, not a match failure:")
+      XCTAssertFalse("abc".contains(regex(#"\N{NOT AN ACTUAL CHARACTER NAME}"#)))
+    }
+  }
+
+  // RL2.6 Wildcards in Property Values
+  //
+  // To meet this requirement, an implementation shall support wildcards in
+  // Unicode property values.
+  func testWildcardsInPropertyValues() {
+    XCTExpectFailure { XCTFail("Implement tests") }
+  }
+  
+  // RL2.7 Full Properties
+  //
+  // To meet this requirement, an implementation shall support all of the
+  // properties listed below that are in the supported version of the Unicode
+  // Standard (or Unicode Technical Standard, respectively), with values that
+  // match the Unicode definitions for that version.
+  func testFullProperties() {
+    // MARK: General
+    // Name (Name_Alias)
+    // Block
+    // Age
+    // General_Category
+    // Script (Script_Extensions)
+    // White_Space
+    // Alphabetic
+    // Hangul_Syllable_Type
+    // Noncharacter_Code_Point
+    // Default_Ignorable_Code_Point
+    // Deprecated
+    // Logical_Order_Exception
+    // Variation_Selector
+
+    // MARK: Numeric
+    // Numeric_Value
+    // Numeric_Type
+    // Hex_Digit
+    // ASCII_Hex_Digit
+
+    // MARK: Identifiers
+    // ID_Continue
+    // ID_Start
+    // XID_Continue
+    // XID_Start
+    // Pattern_Syntax
+    // Pattern_White_Space
+    // Identifier_Status
+    // Identifier_Type
+
+    // MARK: CJK
+    // Ideographic
+    // Unified_Ideograph
+    // Radical
+    // IDS_Binary_Operator
+    // IDS_Trinary_Operator
+    // Equivalent_Unified_Ideograph
+    XCTExpectFailure {
+      XCTFail(#"Unsupported: \(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#)"#)
+      // XCTAssertTrue("⼚⺁厂".contains(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#))
+    }
+
+    // MARK: Case
+    // Uppercase
+    // Lowercase
+    // Simple_Lowercase_Mapping
+    // Simple_Titlecase_Mapping
+    // Simple_Uppercase_Mapping
+    // Simple_Case_Folding
+    // Soft_Dotted
+    // Cased
+    // Case_Ignorable
+    // Changes_When_Lowercased
+    // Changes_When_Uppercased
+    XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Uppercased}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Uppercased=true}"#)))
+    XCTAssertFalse("A".contains(regex(#"\p{Changes_When_Uppercased}"#)))
+    // Changes_When_Titlecased
+    // Changes_When_Casefolded
+    // Changes_When_Casemapped
+
+    // MARK: Normalization
+    // Canonical_Combining_Class
+    // Decomposition_Type
+    // NFC_Quick_Check
+    // NFKC_Quick_Check
+    // NFD_Quick_Check
+    // NFKD_Quick_Check
+    // NFKC_Casefold
+    // Changes_When_NFKC_Casefolded
+
+    // MARK: Emoji
+    // Emoji
+    // Emoji_Presentation
+    // Emoji_Modifier
+    // Emoji_Modifier_Base
+    // Emoji_Component
+    // Extended_Pictographic
+    // Basic_Emoji*
+    // Emoji_Keycap_Sequence*
+    // RGI_Emoji_Modifier_Sequence*
+    // RGI_Emoji_Flag_Sequence*
+    // RGI_Emoji_Tag_Sequence*
+    // RGI_Emoji_ZWJ_Sequence*
+    // RGI_Emoji*
+
+    // MARK: Shaping and Rendering
+    // Join_Control
+    // Joining_Group
+    // Joining_Type
+    // Vertical_Orientation
+    // Line_Break
+    // Grapheme_Cluster_Break
+    // Sentence_Break
+    // Word_Break
+    // East_Asian_Width
+    // Prepended_Concatenation_Mark
+
+    // MARK: Bidirectional
+    // Bidi_Class
+    // Bidi_Control
+    // Bidi_Mirrored
+    // Bidi_Mirroring_Glyph
+    // Bidi_Paired_Bracket
+    // Bidi_Paired_Bracket_Type
+
+    // MARK: Miscellaneous
+    // Math
+    // Quotation_Mark
+    // Dash
+    // Sentence_Terminal
+    // Terminal_Punctuation
+    // Diacritic
+    // Extender
+    // Grapheme_Base
+    // Grapheme_Extend
+    // Regional_Indicator
+  }
+}