Skip to content

Commit 663d8c2

Browse files
committed
Document Skip() even more.
1 parent a7aeb0c commit 663d8c2

File tree

3 files changed

+38
-10
lines changed

3 files changed

+38
-10
lines changed

Sources/Patterns/Operations on Patterns/Skip.swift

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
/// g.skip <- Skip()
3030
/// }
3131
/// ```
32-
/// because in grammars the subexpressions are called, like functions, not "inlined", like Swift variables.
32+
/// because in grammars the subexpressions are _called_, like functions, not "_inlined_", like Swift variables.
3333
/// So the `Skip()` in `g.skip` can't tell what will come after it.
3434
public struct Skip<Input: BidirectionalCollection>: Pattern where Input.Element: Hashable {
3535
public var description: String { "Skip()" }
@@ -54,20 +54,33 @@ extension ContiguousArray {
5454
@_specialize(where Input == String.UTF8View, Element == Instruction<String.UTF8View>)
5555
@usableFromInline
5656
mutating func replaceSkips<Input>() where Element == Instruction<Input> {
57-
// `setupSkip(at: i)` adds 1 new instruction somewhere after `ì`, so we cant loop over self.indices directly
57+
// `setupSkip(at: i)` adds 1 new instruction somewhere after `ì`, so we cant loop over self.indices directly.
5858
var i = self.startIndex
59-
repeat {
59+
while i < self.endIndex {
6060
switch self[i] {
6161
case .skip:
6262
self.setupSkip(at: i)
6363
default: break
6464
}
6565
self.formIndex(after: &i)
66-
} while i < self.endIndex
66+
}
6767
}
6868

6969
/// Replaces the dummy `.skip` instruction at `skipIndex` with one that will search using the instructions
7070
/// right after `skipIndex`.
71+
///
72+
/// In other words we look at the instructions right after the .skip and see if they can be searched for
73+
/// efficiently.
74+
///
75+
/// Also places a .choice right after the search instruction replacing the .skip, and a corresponding .commit
76+
/// somewhere after that again. So if the search succeeds, but a later instruction fails, we can start a new
77+
/// search one step ahead from where the previous search succeeded.
78+
/// In the sub-pattern `Skip() • "abc" • letter • Skip() • "xyz"`, if "abc" succeeds, but there is no
79+
/// letter afterwards, we search for "abc" again from the "b". But if there is "abc" and another letter,
80+
/// we don't search for "abc" again because the next instruction is another .skip, and if we can't find "xyz"
81+
/// further on there's no point in searching for "abc" again.
82+
///
83+
/// See `placeSkipCommit` for more.
7184
@usableFromInline
7285
mutating func setupSkip<Input>(at skipIndex: Index) where Element == Instruction<Input> {
7386
let afterSkip = skipIndex + 1
@@ -77,7 +90,7 @@ extension ContiguousArray {
7790
input[index...].indices.first(where: { function(input, $0) })
7891
?? (function(input, input.endIndex) ? input.endIndex : nil)
7992
}
80-
self[afterSkip] = .choice(offset: -1, atIndexOffset: 1)
93+
self[afterSkip] = .choice(offset: -1, atIndexOffset: +1)
8194
case .checkIndex(_, atIndexOffset: _):
8295
// A `.checkIndex` will only have a non-zero offset if it has been moved by `moveMovablesForward`,
8396
// and that will never move anything beyond a `.skip`.
@@ -105,6 +118,7 @@ extension ContiguousArray {
105118
}
106119
self[afterSkip] = .choice(offset: -1, atIndexOffset: 0)
107120
} else {
121+
// More than one literal, use Boyer–Moore–Horspool search.
108122
let cache = SearchCache(elements)
109123
self[skipIndex] = .search { input, index in
110124
input.range(of: cache, from: index)?.upperBound
@@ -113,13 +127,25 @@ extension ContiguousArray {
113127
self[afterSkip + 1] = .jump(offset: elements.count - 1)
114128
}
115129
default:
130+
// Could not find instructions to search for efficiently,
131+
// so we just try them and if they fail we move one step forward and try again.
116132
self[skipIndex] = .choice(offset: 0, atIndexOffset: +1)
117133
self.placeSkipCommit(startSearchFrom: skipIndex + 1)
118134
return
119135
}
120136
self.placeSkipCommit(startSearchFrom: skipIndex + 2)
121137
}
122138

139+
/// Places a .commit after replacing a .skip .
140+
///
141+
/// Any instruction replacing a .skip will have a .choice right after it.
142+
/// We place the corresponding .commit as far after it as possible.
143+
/// As always we have to make sure that no pairs of corresponding .choice (or other instruction) and .commit
144+
/// intersect with any other pair.
145+
///
146+
/// So we have to jump over any optional repetition (`¿+*` and `.repeat(range)`) and any `/` choice patterns.
147+
/// All of them use the `.choice` instruction.
148+
/// If we are inside any of these we put the .commit at the end of our part of the pattern.
123149
@usableFromInline
124150
mutating func placeSkipCommit<Input>(startSearchFrom: Index) where Element == Instruction<Input> {
125151
var i = startSearchFrom
@@ -128,7 +154,7 @@ extension ContiguousArray {
128154
case let .choice(_, indexOffset) where indexOffset < 0:
129155
fatalError("Not implemented.")
130156
case let .choice(offset, _):
131-
// Follow every choice offset.
157+
// We jump over this entire sub-pattern.
132158
// If one step back there is a jump forwards, then it's a '/' pattern. So follow that jump too.
133159
if case let .jump(jumpOffset) = self[i + offset - 1], jumpOffset > 0 {
134160
i += offset - 1 + jumpOffset
@@ -140,6 +166,7 @@ extension ContiguousArray {
140166
case .elementEquals, .checkElement, .checkIndex, .moveIndex, .captureStart, .captureEnd, .call, .jump:
141167
i += 1
142168
case .commit, .choiceEnd, .return, .match, .skip, .search, .fail:
169+
// This is as far as we can go.
143170
insertInstructions(.commit, at: i)
144171
return
145172
case .openCall:
@@ -158,7 +185,7 @@ extension ContiguousArray {
158185
where Element == Instruction<Input> {
159186
insert(contentsOf: newInstructions, at: location)
160187
let insertedRange = location ..< (location + newInstructions.count + 1)
161-
/// instruction ... location ... offsetTarget
188+
// instruction ... location ... offsetTarget
162189
for i in startIndex ..< insertedRange.lowerBound {
163190
switch self[i] {
164191
case let .call(offset) where offset > (location - i):
@@ -171,7 +198,7 @@ extension ContiguousArray {
171198
break
172199
}
173200
}
174-
/// offsetTarget ... location ... instruction
201+
// offsetTarget ... location ... instruction
175202
for i in insertedRange.upperBound ..< endIndex {
176203
switch self[i] {
177204
case let .call(offset) where offset < (location - i):

Sources/Patterns/Pattern And Instruction.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
public protocol Pattern: CustomStringConvertible {
1010
associatedtype Input: BidirectionalCollection where Input.Element: Hashable
1111
typealias ParsedRange = Range<Input.Index>
12+
// Ideally this should be used by all implementors, but that sometimes causes a compiler crash (Swift 5.3 beta)
1213
typealias Instructions = ContiguousArray<Instruction<Input>>
1314

1415
/// Appends Instructions for the Parser to `instructions`.

Sources/Patterns/VMBacktrack.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ extension VMEngine {
9494
@usableFromInline
9595
func launch(input: Input, thread: Thread) -> Thread? {
9696
var stack = ContiguousArray<Thread>()[...]
97-
9897
stack.append(thread)
98+
9999
while var thread = stack.popLast() {
100100
assert(!thread.isReturnAddress, "Stack unexpectedly contains .returnAddress after fail")
101101
defer { // Fail, when `break loop` is called.
@@ -147,7 +147,7 @@ extension VMEngine {
147147
#if DEBUG
148148
let entry = stack.popLast()
149149
assert(entry != nil, "Empty stack during .commit")
150-
assert(entry.map { !$0.isReturnAddress } ?? true, "Missing thread during .cancelLastSplit")
150+
assert(entry.map { !$0.isReturnAddress } ?? true, "Missing thread during .commit")
151151
#else
152152
stack.removeLast()
153153
#endif

0 commit comments

Comments
 (0)