Skip to content

Commit bf7c392

Browse files
committed
sink match bitset
1 parent 3c87596 commit bf7c392

File tree

2 files changed

+28
-10
lines changed

2 files changed

+28
-10
lines changed

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ extension Processor {
33
var next: Input.Index?
44
switch payload.type {
55
case .bitset:
6-
next = _doMatchBitset(registers[payload.bitset])
6+
next = input.matchBitset(
7+
registers[payload.bitset], at: currentPosition, limitedBy: end)
78
case .asciiChar:
89
next = input.matchScalar(
910
UnicodeScalar.init(_value: UInt32(payload.asciiChar)),

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ extension Processor {
243243
}
244244

245245
mutating func matchCaseInsensitive(_ e: Element) -> Bool {
246+
// TODO: need benchmark coverage
246247
guard let cur = load(), cur.lowercased() == e.lowercased() else {
247248
signalFailure()
248249
return false
@@ -290,6 +291,7 @@ extension Processor {
290291
_ s: Unicode.Scalar,
291292
boundaryCheck: Bool
292293
) -> Bool {
294+
// TODO: needs benchmark coverage
293295
guard let curScalar = loadScalar(),
294296
s.properties.lowercaseMapping == curScalar.properties.lowercaseMapping,
295297
let idx = input.unicodeScalars.index(
@@ -305,21 +307,15 @@ extension Processor {
305307
return true
306308
}
307309

308-
func _doMatchBitset(_ bitset: DSLTree.CustomCharacterClass.AsciiBitset) -> Input.Index? {
309-
if let cur = load(), bitset.matches(char: cur) {
310-
return input.index(after: currentPosition)
311-
} else {
312-
return nil
313-
}
314-
}
315-
316310
// If we have a bitset we know that the CharacterClass only matches against
317311
// ascii characters, so check if the current input element is ascii then
318312
// check if it is set in the bitset
319313
mutating func matchBitset(
320314
_ bitset: DSLTree.CustomCharacterClass.AsciiBitset
321315
) -> Bool {
322-
guard let next = _doMatchBitset(bitset) else {
316+
guard let next = input.matchBitset(
317+
bitset, at: currentPosition, limitedBy: end
318+
) else {
323319
signalFailure()
324320
return false
325321
}
@@ -748,4 +744,25 @@ extension String {
748744
return idx
749745
}
750746

747+
func matchBitset(
748+
_ bitset: DSLTree.CustomCharacterClass.AsciiBitset,
749+
at pos: Index,
750+
limitedBy end: Index
751+
) -> Index? {
752+
// TODO: extremely quick-check-able
753+
// TODO: can be sped up with string internals
754+
755+
assert(end <= endIndex)
756+
757+
guard pos < end, bitset.matches(char: self[pos]) else {
758+
return nil
759+
}
760+
761+
let idx = index(after: pos)
762+
guard idx <= end else { return nil }
763+
764+
return idx
765+
}
766+
767+
751768
}

0 commit comments

Comments
 (0)