diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 66fefc49e..53c91bcca 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -217,27 +217,16 @@ fileprivate extension Compiler.ByteCodeGen { case .graphemeCluster: builder.buildAdvance(1) case .unicodeScalar: - // TODO: builder.buildAdvanceUnicodeScalar(1) - builder.buildConsume { input, bounds in - input.unicodeScalars.index(after: bounds.lowerBound) - } + builder.buildAdvanceUnicodeScalar(1) } } mutating func emitAnyNonNewline() { switch options.semanticLevel { case .graphemeCluster: - builder.buildConsume { input, bounds in - input[bounds.lowerBound].isNewline - ? nil - : input.index(after: bounds.lowerBound) - } + builder.buildConsumeNonNewline() case .unicodeScalar: - builder.buildConsume { input, bounds in - input[bounds.lowerBound].isNewline - ? nil - : input.unicodeScalars.index(after: bounds.lowerBound) - } + builder.buildConsumeScalarNonNewline() } } diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index 1e2ed757b..9db204250 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -196,11 +196,19 @@ extension Instruction.Payload { interpret() } - init(distance: Distance) { - self.init(distance) + init(distance: Distance, isScalarDistance: Bool = false) { + self.init(isScalarDistance ? 1 : 0, distance) } - var distance: Distance { - interpret() + var distance: (isScalarDistance: Bool, Distance) { + let pair: (UInt64, Distance) = interpretPair() + return (isScalarDistance: pair.0 == 1, pair.1) + } + + init(isScalar: Bool) { + self.init(isScalar ? 1 : 0) + } + var isScalar: Bool { + self.rawValue == 1 } init(bool: BoolRegister) { diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift index a41d2f4af..21ab90a03 100644 --- a/Sources/_StringProcessing/Engine/Instruction.swift +++ b/Sources/_StringProcessing/Engine/Instruction.swift @@ -122,6 +122,10 @@ extension Instruction { /// - If it is inverted /// - If it strictly matches only ascii values case matchBuiltin + + /// Matches any non newline character + /// Operand: If we are in scalar mode or not + case matchAnyNonNewline // MARK: Extension points diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 959b1507e..884ed47ab 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -142,6 +142,19 @@ extension MEProgram.Builder { mutating func buildAdvance(_ n: Distance) { instructions.append(.init(.advance, .init(distance: n))) } + + mutating func buildAdvanceUnicodeScalar(_ n: Distance) { + instructions.append( + .init(.advance, .init(distance: n, isScalarDistance: true))) + } + + mutating func buildConsumeNonNewline() { + instructions.append(.init(.matchAnyNonNewline, .init(isScalar: false))) + } + + mutating func buildConsumeScalarNonNewline() { + instructions.append(.init(.matchAnyNonNewline, .init(isScalar: true))) + } mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) { instructions.append(.init( diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index a62c1e070..a5a59b863 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -181,6 +181,18 @@ extension Processor { currentPosition = idx return true } + + // Advances in unicode scalar view + mutating func consumeScalar(_ n: Distance) -> Bool { + guard let idx = input.unicodeScalars.index( + currentPosition, offsetBy: n.rawValue, limitedBy: end + ) else { + signalFailure() + return false + } + currentPosition = idx + return true + } /// Continue matching at the specified index. /// @@ -321,6 +333,26 @@ extension Processor { return true } + // Matches the next character if it is not a newline + mutating func matchAnyNonNewline() -> Bool { + guard let c = load(), !c.isNewline else { + signalFailure() + return false + } + _uncheckedForcedConsumeOne() + return true + } + + // Matches the next scalar if it is not a newline + mutating func matchAnyNonNewlineScalar() -> Bool { + guard let s = loadScalar(), !s.isNewline else { + signalFailure() + return false + } + input.unicodeScalars.formIndex(after: ¤tPosition) + return true + } + mutating func signalFailure() { guard !savePoints.isEmpty else { state = .fail @@ -469,10 +501,26 @@ extension Processor { signalFailure() case .advance: - if consume(payload.distance) { - controller.step() + let (isScalar, distance) = payload.distance + if isScalar { + if consumeScalar(distance) { + controller.step() + } + } else { + if consume(distance) { + controller.step() + } + } + case .matchAnyNonNewline: + if payload.isScalar { + if matchAnyNonNewlineScalar() { + controller.step() + } + } else { + if matchAnyNonNewline() { + controller.step() + } } - case .match: let (isCaseInsensitive, reg) = payload.elementPayload if isCaseInsensitive { diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift index 54fc3b561..752921e19 100644 --- a/Tests/RegexTests/CompileTests.swift +++ b/Tests/RegexTests/CompileTests.swift @@ -37,6 +37,7 @@ enum DecodedInstr { case matchScalarCaseInsensitive case matchScalarUnchecked case matchBitsetScalar + case matchAnyNonNewline case matchBitset case matchBuiltin case consumeBy @@ -116,7 +117,9 @@ extension DecodedInstr { return .matchBitset } case .consumeBy: - return consumeBy + return .consumeBy + case .matchAnyNonNewline: + return .matchAnyNonNewline case .assertBy: return .assertBy case .matchBy: