Skip to content

Commit 41fcc24

Browse files
authored
Merge pull request #40 from kareman/addSkipTests
Add `Skip` tests
2 parents 04927a7 + 1101129 commit 41fcc24

File tree

9 files changed

+274
-68
lines changed

9 files changed

+274
-68
lines changed

Sources/Patterns/Grammar.swift

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,33 @@ public class Grammar: Pattern {
4949
var instructions = finalInstructions
5050
let startIndex = instructions.endIndex
5151
instructions.append(
52-
.openCall(name: try firstPattern ?? Parser<Input>.InitError.message("Grammar is empty")))
52+
.openCall(name: try firstPattern ?? Parser<Input>.InitError.message("Grammar is empty.")))
5353
instructions.append(.jump(offset: .max)) // replaced later
54-
var callTable = [String: Instructions.Index]()
54+
var callTable = [String: Range<Instructions.Index>]()
5555
for (name, pattern) in patterns {
56-
callTable[name] = instructions.endIndex
56+
let startIndex = instructions.endIndex
5757
try pattern.createInstructions(&instructions)
58-
precondition(callTable[name] != instructions.endIndex,
59-
"Pattern '\(name) <- \(pattern)' was empty")
6058
instructions.append(.return)
59+
guard (startIndex ..< instructions.endIndex).count > 1 else {
60+
throw Parser<Input>.InitError.message("Pattern '\(name) <- \(pattern)' was empty.")
61+
}
62+
callTable[name] = startIndex ..< instructions.endIndex
6163
}
6264

6365
for i in instructions.indices[startIndex...] {
6466
if case let .openCall(name) = instructions[i] {
65-
let address = try callTable[name]
66-
?? Parser<Input>.InitError.message("Pattern '\(name)' was never defined with ´<-´ operator.")
67-
instructions[i] = .call(offset: address - i)
67+
guard let subpatternRange = callTable[name] else {
68+
throw Parser<Input>.InitError.message("Pattern '\(name)' was never defined with ´<-´ operator.")
69+
}
70+
// If the last non-dummy (i.e. .choiceEnd) instruction in a subpattern is a call to itself we perform
71+
// a tail call optimisation by jumping directly instead.
72+
// The very last instruction is a .return, so skip that.
73+
if subpatternRange.upperBound - 2 == i
74+
|| (subpatternRange.upperBound - 3 == i && instructions[i + 1].doesNotDoAnything) {
75+
instructions[i] = .jump(offset: subpatternRange.lowerBound - i)
76+
} else {
77+
instructions[i] = .call(offset: subpatternRange.lowerBound - i)
78+
}
6879
}
6980
}
7081
instructions[startIndex + 1] = .jump(offset: instructions.endIndex - startIndex - 1)

Sources/Patterns/Operations on Patterns/Skip.swift

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,12 @@ public struct Skip: Pattern {
1313
@inlinable
1414
public func createInstructions(_ instructions: inout Instructions) throws {
1515
instructions.append(.skip)
16-
instructions.append(.jump(offset: 1)) // dummy
1716
}
1817
}
1918

2019
import SE0270_RangeSet
2120

22-
extension MutableCollection where Self: RandomAccessCollection, Index == Int {
21+
extension MutableCollection where Self: RandomAccessCollection, Self: RangeReplaceableCollection, Index == Int {
2322
@usableFromInline
2423
mutating func replaceSkips<Input>() where Element == Instruction<Input> {
2524
for i in self.indices {
@@ -33,7 +32,7 @@ extension MutableCollection where Self: RandomAccessCollection, Index == Int {
3332

3433
@usableFromInline
3534
mutating func setupSkip<Input>(at skipIndex: Index) where Element == Instruction<Input> {
36-
let searchablesStartAt = skipIndex + 2
35+
let searchablesStartAt = skipIndex + 1
3736
switch self[searchablesStartAt] {
3837
case let .checkIndex(function, atIndexOffset: 0):
3938
self[skipIndex] = .search { input, index in
@@ -42,7 +41,7 @@ extension MutableCollection where Self: RandomAccessCollection, Index == Int {
4241
}
4342
self[searchablesStartAt] = .choice(offset: -1, atIndexOffset: 1)
4443
case .checkIndex(_, atIndexOffset: _):
45-
fatalError("Cannot see a valid reason for a `.checkIndex` with a non-zero offset to be located right after a `.skip` instruction. Correct me if I'm wrong.")
44+
fatalError("Cannot see a valid reason for a `.checkIndex` with a non-zero offset to be located right after a `.skip` instruction.") // Correct me if I'm wrong.
4645
case let .checkElement(test):
4746
self[skipIndex] = .search { input, index in
4847
input[index...].firstIndex(where: test)
@@ -75,14 +74,14 @@ extension MutableCollection where Self: RandomAccessCollection, Index == Int {
7574
}
7675
default:
7776
self[skipIndex] = .choice(offset: 0, atIndexOffset: +1)
78-
self.placeSkipCommit(dummyIsAt: skipIndex + 1, startSearchFrom: skipIndex + 2)
77+
self.placeSkipCommit(startSearchFrom: skipIndex + 1)
7978
return
8079
}
81-
self.placeSkipCommit(dummyIsAt: skipIndex + 1, startSearchFrom: skipIndex + 3)
80+
self.placeSkipCommit(startSearchFrom: skipIndex + 2)
8281
}
8382

8483
@usableFromInline
85-
mutating func placeSkipCommit<Input>(dummyIsAt dummyIndex: Index, startSearchFrom: Index)
84+
mutating func placeSkipCommit<Input>(startSearchFrom: Index)
8685
where Element == Instruction<Input> {
8786
var i = startSearchFrom
8887
loop: while true {
@@ -97,17 +96,48 @@ extension MutableCollection where Self: RandomAccessCollection, Index == Int {
9796
} else {
9897
i += offset
9998
}
100-
case let .jump(offset):
99+
case let .jump(offset) where offset > 0: // If we jump backwards we are likely to enter an infinite loop.
101100
i += offset
102-
case .elementEquals, .checkElement, .checkIndex, .moveIndex, .captureStart, .captureEnd, .call:
101+
case .elementEquals, .checkElement, .checkIndex, .moveIndex, .captureStart, .captureEnd, .call, .jump:
103102
i += 1
104103
case .commit, .choiceEnd, .return, .match, .skip, .search:
105-
moveSubranges(RangeSet(dummyIndex ..< (dummyIndex + 1)), to: i)
106-
self[i - 1] = .commit
104+
insertInstructions(.commit, at: i)
107105
return
108106
case .fail, .openCall:
109107
fatalError()
110108
}
111109
}
112110
}
111+
112+
/// Inserts new instructions at `location`. Adjusts the offsets of other instructions accordingly.
113+
@usableFromInline
114+
mutating func insertInstructions<Input>(_ newInstructions: Element..., at location: Index)
115+
where Element == Instruction<Input> {
116+
insert(contentsOf: newInstructions, at: location)
117+
let insertedRange = location ..< (location + newInstructions.count + 1)
118+
for i in startIndex ..< insertedRange.lowerBound {
119+
switch self[i] {
120+
case let .call(offset) where offset > (location - i):
121+
self[i] = .call(offset: offset + newInstructions.count)
122+
case let .jump(offset) where offset > (location - i):
123+
self[i] = .jump(offset: offset + newInstructions.count)
124+
case let .choice(offset, atIndexOffset) where offset > (location - i):
125+
self[i] = .choice(offset: offset + newInstructions.count, atIndexOffset: atIndexOffset)
126+
default:
127+
break
128+
}
129+
}
130+
for i in insertedRange.upperBound ..< endIndex {
131+
switch self[i] {
132+
case let .call(offset) where offset < (location - i):
133+
self[i] = .call(offset: offset - newInstructions.count)
134+
case let .jump(offset) where offset < (location - i):
135+
self[i] = .jump(offset: offset - newInstructions.count)
136+
case let .choice(offset, atIndexOffset) where offset < (location - i):
137+
self[i] = .choice(offset: offset - newInstructions.count, atIndexOffset: atIndexOffset)
138+
default:
139+
break
140+
}
141+
}
142+
}
113143
}

Sources/Patterns/Parser.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public struct Parser<Input: BidirectionalCollection> where Input.Element: Hashab
3333
try self.init(Skip() pattern)
3434
}
3535

36+
@inlinable
3637
public func ranges(in input: Input, from startindex: Input.Index? = nil)
3738
-> AnySequence<Range<Input.Index>> {
3839
AnySequence(matches(in: input, from: startindex).lazy.map { $0.range })

Sources/Patterns/Pattern And Instruction.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,17 @@ public enum Instruction<Input: BidirectionalCollection> where Input.Element: Has
130130
return nil
131131
}
132132
}
133+
134+
/// Returns false only if instruction has no effect.
135+
@usableFromInline
136+
var doesNotDoAnything: Bool {
137+
switch self {
138+
case .choiceEnd, .jump(+1):
139+
return true
140+
default:
141+
return false
142+
}
143+
}
133144
}
134145

135146
extension Sequence where Element == Instruction<Pattern.Input> {

Tests/LongTests/LongTests.swift

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class LongTests: XCTestCase {
2222
}
2323

2424
func testPlaygroundExample() throws {
25-
let text = """
25+
let text = #"""
2626
0 0.0 0.01
2727
-0 +0 -0.0 +0.0
2828
-123.456e+00 -123.456E+00 -123.456e-00 -123.456E-00
@@ -33,7 +33,7 @@ class LongTests: XCTestCase {
3333
0x123E 0x123e
3434
0x0123456789abcdef
3535
0b0 0b1 0b0000 0b0001 0b11110000 0b0000_1111 0b1010_00_11
36-
"""
36+
"""#
3737

3838
let unsigned = digit+
3939
let sign = "-" / "+"
@@ -48,4 +48,93 @@ class LongTests: XCTestCase {
4848

4949
XCTAssertEqual(Array(parser.matches(in: text)).count, 44)
5050
}
51+
52+
// from http://www.inf.puc-rio.br/~roberto/docs/peg.pdf, page 2 and 5
53+
static let pegGrammar = Grammar { g in
54+
//g.all <- g.pattern • !any
55+
g.pattern <- g.grammar / g.simplepatt
56+
g.grammar <- (g.nonterminal "<-" g.sp g.simplepatt)+
57+
g.simplepatt <- g.alternative ("/" g.sp g.alternative)*
58+
g.alternative <- (OneOf("!&")¿ g.sp g.suffix)+
59+
g.suffix <- g.primary (OneOf("*+?") g.sp)*
60+
g.primary <- "(" g.sp g.pattern ")" g.sp / "." g.sp / g.literal / g.charclass / g.nonterminal !"<-"
61+
g.literal <- "" (!"" any)* "" g.sp
62+
g.charclass <- "[" (!"]" (any "-" any / any))* "]" g.sp
63+
g.nonterminal <- OneOf("a" ... "z", "A" ... "Z")+ g.sp
64+
g.sp <- OneOf(" \t\n")*
65+
}
66+
67+
static let pegGrammarParser = { try! Parser(pegGrammar) }()
68+
69+
func testPEGGrammar() throws {
70+
// page 5
71+
let grammar1Text = """
72+
pattern <- grammar / simplepatt
73+
grammar <- (nonterminal ’<-’ sp simplepatt)+
74+
simplepatt <- alternative (’/’ sp alternative)*
75+
alternative <- ([!&]? sp suffix)+
76+
suffix <- primary ([*+?] sp)*
77+
primary <- ’(’ sp pattern ’)’ sp / ’.’ sp / literal / charclass / nonterminal !’<-’
78+
literal <- [’] (![’] .)* [’] sp
79+
charclass <- ’[’ (!’]’ (. ’-’ . / . ))* ’]’ sp
80+
nonterminal <- [a-zA-Z]+ sp
81+
sp <- [ \t\n]*
82+
"""
83+
XCTAssertEqual(Self.pegGrammarParser.match(in: grammar1Text)?.endIndex, grammar1Text.endIndex)
84+
85+
// page 2
86+
let grammar2Text = """
87+
grammar <- (nonterminal ’<-’ sp pattern)+
88+
pattern <- alternative (’/’ sp alternative)*
89+
alternative <- ([!&]? sp suffix)+
90+
suffix <- primary ([*+?] sp)*
91+
primary <- ’(’ sp pattern ’)’ sp / ’.’ sp / literal / charclass / nonterminal !’<-’
92+
literal <- [’] (![’] .)* [’] sp
93+
charclass <- ’[’ (!’]’ (. ’-’ . / . ))* ’]’ sp
94+
nonterminal <- [a-zA-Z]+ sp
95+
sp <- [ \t\n]*
96+
"""
97+
XCTAssertEqual(Self.pegGrammarParser.match(in: grammar2Text)?.endIndex, grammar2Text.endIndex)
98+
}
99+
100+
func testOriginalPEGGrammar() throws {
101+
try XCTSkipIf(true, "pegGrammar does not support escaping characters.")
102+
103+
// https://bford.info/pub/lang/peg.pdf Page 2, Figure 1.
104+
let origPEGGrammarText = """
105+
# Hierarchical syntax
106+
Grammar <- Spacing Definition+ EndOfFile
107+
Definition <- Identifier LEFTARROW Expression
108+
Expression <- Sequence (SLASH Sequence)*
109+
Sequence <- Prefix*
110+
Prefix <- (AND / NOT)? Suffix
111+
Suffix <- Primary (QUESTION / STAR / PLUS)?
112+
Primary <- Identifier !LEFTARROW / OPEN Expression CLOSE / Literal / Class / DOT
113+
114+
# Lexical syntax
115+
Identifier <- IdentStart IdentCont* Spacing
116+
IdentStart <- [a-zA-Z_]
117+
IdentCont <- IdentStart / [0-9]
118+
Literal <- [’] (![’] Char)* [’] Spacing / ["] (!["] Char)* ["] Spacing
119+
Class <- ’[’ (!’]’ Range)* ’]’ Spacing
120+
Range <- Char ’-’ Char / Char
121+
Char <- ’\\’ [nrt’"[]\\] / ’\\’ [0-2][0-7][0-7] / ’\\’ [0-7][0-7]? / !’\\’ .
122+
LEFTARROW <- ’<-’ Spacing
123+
SLASH <- ’/’ Spacing
124+
AND <- ’&’ Spacing
125+
NOT <- ’!’ Spacing
126+
QUESTION <- ’?’ Spacing
127+
STAR <- ’*’ Spacing
128+
PLUS <- ’+’ Spacing
129+
OPEN <- ’(’ Spacing
130+
CLOSE <- ’)’ Spacing
131+
DOT <- ’.’ Spacing
132+
Spacing <- (Space / Comment)*
133+
Comment <- ’#’ (!EndOfLine .)* EndOfLine
134+
Space <- ’ ’ / ’\t’ / EndOfLine
135+
EndOfLine <- ’\r\n’ / ’\n’ / ’\r
136+
EndOfFile <- !.
137+
"""
138+
XCTAssertEqual(Self.pegGrammarParser.match(in: origPEGGrammarText)?.endIndex, origPEGGrammarText.endIndex)
139+
}
51140
}

Tests/PatternsTests/ConcatenationTests.swift

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -69,44 +69,6 @@ class ConcatenationTests: XCTestCase {
6969
result: ["FMA026712 TECNOAUTOMOTRIZ ATLACOMULCO S"])
7070
}
7171

72-
func testSkip1() throws {
73-
let text = "This is a test text."
74-
assertParseAll(
75-
Capture(" " Skip() " "),
76-
input: text, result: [" is ", " test "])
77-
78-
assertParseAll(
79-
Capture(" " Skip() "d"),
80-
input: " ad d", result: [" ad", " d"])
81-
}
82-
83-
func testSkipAndCapture() throws {
84-
let text = "This is a test text."
85-
assertParseAll(
86-
" " Capture(letter Skip()) " ",
87-
input: text, result: ["is", "a", "test"])
88-
assertParseAll(
89-
" " Capture(Skip() letter+) " ",
90-
input: text, result: ["is", "a", "test"])
91-
assertParseAll(
92-
" " Capture(Skip()) " ",
93-
input: text, result: ["is", "a", "test"])
94-
95-
assertParseAll(
96-
Line.start Capture(Skip()) Line.end,
97-
input: """
98-
1
99-
2
100-
101-
3
102-
""",
103-
result: ["1", "2", "", "3"])
104-
105-
// undefined (Skip at end)
106-
_ = try Parser(search: " " Capture(Skip()))
107-
.matches(in: text)
108-
}
109-
11072
func testMatchFullRange() throws {
11173
let text = """
11274
line 1

Tests/PatternsTests/GrammarTests.swift

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// Created by Kåre Morstøl on 27/05/2020.
66
//
77

8-
import Patterns
8+
@testable import Patterns
99
import XCTest
1010

1111
class GrammarTests: XCTestCase {
@@ -54,4 +54,21 @@ class GrammarTests: XCTestCase {
5454
assertParseMarkers(p, input: "1+2-3*(4+3)|")
5555
assertParseAll(p, input: "1+2(", count: 0)
5656
}
57+
58+
func testOptimisesTailCall() throws {
59+
let g = Grammar { g in
60+
g.a <- " " / Skip() g.a
61+
}
62+
63+
func isCall(_ inst: Instruction<String>) -> Bool {
64+
switch inst {
65+
case .call:
66+
return true
67+
default: return false
68+
}
69+
}
70+
71+
XCTAssertEqual(try Parser(g).matcher.instructions.filter(isCall(_:)).count, 1)
72+
XCTAssertEqual(try Parser(search: g).matcher.instructions.filter(isCall(_:)).count, 1)
73+
}
5774
}

0 commit comments

Comments
 (0)