Skip to content

Commit 07a2213

Browse files
committed
wip: quick and dirty optimizer
1 parent 938bdd8 commit 07a2213

File tree

4 files changed

+140
-12
lines changed

4 files changed

+140
-12
lines changed

Sources/Regex2BNF/Regex2BNF.swift

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,53 +24,50 @@ struct Regex2BNF: ParsableCommand {
2424
var runExamples = false
2525

2626
func convert(_ pattern: String) throws {
27-
print("\n=== /\(pattern)/ ===\n")
28-
let ast = try _RegexParser.parse(pattern, .init())
29-
print(ast)
30-
print()
27+
print("/\(pattern)/\n")
3128
print(try _printAsBNF(inputRegex: pattern))
3229
}
3330

3431
mutating func run() throws {
3532
if runExamples {
3633
// TODO: Turn into test cases
37-
print("[Examples")
34+
// print("[Examples")
3835

39-
print("Single-scalar character literals:")
36+
// print("Single-scalar character literals:")
4037
try convert("a")
4138
try convert("Z")
4239
try convert("")
4340
try convert("")
4441
try convert("\u{301}")
4542

4643

47-
print("Multi-scalar character literals")
44+
// print("Multi-scalar character literals")
4845
try convert("🧟‍♀️")
4946
try convert("e\u{301}")
5047

51-
print("Simple alternations")
48+
// print("Simple alternations")
5249
try convert("a|b")
5350
try convert("a|b|c|d")
5451
try convert("a|🧟‍♀️\u{301}日|z")
5552

56-
print("Simple quantifications")
53+
// print("Simple quantifications")
5754
try convert("a*")
5855
try convert("a+")
5956
try convert("a?")
6057
try convert("a{2,10}")
6158
try convert("a{,10}")
6259
try convert("a{2,}")
6360

64-
print("Grouping")
61+
// print("Grouping")
6562
try convert("a(b|c)d")
6663
try convert("a(bcd|def(g|h)+)z")
6764

68-
print("Dot")
65+
// print("Dot")
6966
// try convert(".*")
7067
// try convert("(a|b)*.{3}(a|b)")
7168

7269

73-
print("[Done]")
70+
// print("[Done]")
7471
}
7572
try convert(pattern)
7673

Sources/_RegexParser/Regex/BNF/BNFConvert.swift

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,136 @@ extension BNFConvert {
291291
}
292292
}
293293

294+
extension BNFConvert {
295+
// TODO: I just want a use-def chain
296+
func calculateUseGraph() -> [NonTerminalSymbol: [NonTerminalSymbol]] {
297+
fatalError()
298+
}
299+
300+
301+
302+
/// Optimize the BNF
303+
mutating func optimize() {
304+
// Iterate until we reach a fixed point
305+
var changed = true
306+
while changed {
307+
changed = false
308+
309+
//
310+
// Value propagation: propagate small single-choice single-symbol
311+
// productions
312+
//
313+
// A ::= B C D E
314+
// B ::= "b"
315+
// C ::= C2
316+
// C2 ::= "c"
317+
// D ::= "d" "d" "d"
318+
// E ::= "e" "e" "e" "e" ...
319+
//
320+
// -->
321+
//
322+
// A ::= "b" "c" "d" "d" "d" E
323+
// E ::= "e" "e" "e" "e" ...
324+
//
325+
326+
// Build up a list of single-choice single-symbol productions
327+
// for upwards propagation
328+
let terminalSequenceThreshold = 3
329+
var singles = [NonTerminalSymbol: Symbol]()
330+
for (key, val) in productions {
331+
if val.count == 1 {
332+
let valChoice = val.first!
333+
if valChoice.sequence.count == 1 {
334+
let valSym = valChoice.sequence.first!
335+
if case .terminalSequence(let array) = valSym {
336+
if array.count > terminalSequenceThreshold {
337+
continue
338+
}
339+
}
340+
singles[key] = valSym
341+
}
342+
}
343+
}
344+
345+
for (key, val) in productions {
346+
var valCopy = val
347+
var valCopyDidChange = false
348+
349+
for choiceIdx in val.indices {
350+
351+
let choice = val[choiceIdx]
352+
var choiceCopy = choice
353+
var choiceCopyDidChange = false
354+
355+
for idx in choice.sequence.indices {
356+
if case .nonTerminal(let nt) = choice.sequence[idx] {
357+
if let sym = singles[nt] {
358+
choiceCopy.sequence[idx] = sym
359+
choiceCopyDidChange = true
360+
}
361+
}
362+
}
363+
364+
if choiceCopyDidChange {
365+
valCopy[choiceIdx] = choiceCopy
366+
valCopyDidChange = true
367+
}
368+
}
369+
370+
if valCopyDidChange {
371+
productions[key] = valCopy
372+
changed = true
373+
}
374+
}
375+
376+
// TODO: I think the below is unnecessary, since that would have
377+
// upwards propagated for everyone except root.
378+
//
379+
// // Check for a simple layer of redirection:
380+
// //
381+
// // A ::= B
382+
// // B ::= ...
383+
// //
384+
// // -->
385+
// //
386+
// // A ::= ...
387+
// for (key, val) in productions {
388+
// if val.count == 1 {
389+
// let valChoice = val.first!
390+
// if valChoice.sequence.count == 1 {
391+
// let valSym = valChoice.sequence.first!
392+
// if case .nonTerminal(let rhs) = valSym {
393+
// guard let rhsProd = productions[rhs] else {
394+
// fatalError("Invariant violated: Unknown production")
395+
// }
396+
// productions[key] = rhsProd
397+
// changed = true
398+
// }
399+
// }
400+
// }
401+
// }
402+
403+
// Check ROOT, since it has no uses it couldn't upward propagate
404+
// a single non-terminal child
405+
guard let rootSymbol = root else {
406+
fatalError("Invariant violated: no root set")
407+
}
408+
guard let val = productions[rootSymbol] else {
409+
// TODO: or is this an empty grammar?
410+
// TODO: test empty regex
411+
fatalError("Invariant violated: root has no production")
412+
}
413+
414+
// TODO: This isn't a win when RHS already has uses
415+
if val.count == 1 {
416+
if case .nonTerminal(let rhs) = val.first!.sequence.first! {
417+
productions[rootSymbol] = productions[rhs]
418+
changed = true
419+
}
420+
}
421+
}
422+
}
423+
}
294424

295425

296426
extension BNFConvert {

Sources/_RegexParser/Regex/Printing/PrintAsBNF.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ func convert(_ ast: AST) throws -> BNF {
2121
var converter = BNFConvert()
2222
let rhs = try converter.convert(ast.root)
2323
converter.root = converter.createProduction("ROOT", rhs)
24+
converter.optimize()
2425
return converter.createBNF()
2526
}
2627

Tests/RegexTests/BNFTests.swift

Whitespace-only changes.

0 commit comments

Comments
 (0)