Skip to content

Commit 362b266

Browse files
committed
Initial refactor of lexing.
Extended lexer.Definition to support directly lexing strings and []byte slices. Remove ebnf and regex lexers. An adapter has been added for v0 lexers.
1 parent 2403858 commit 362b266

35 files changed

+202
-1875
lines changed

_examples/basic/main.go

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,30 +8,25 @@ import (
88

99
"github.com/alecthomas/participle"
1010
"github.com/alecthomas/participle/lexer"
11-
"github.com/alecthomas/participle/lexer/ebnf"
11+
"github.com/alecthomas/participle/lexer/stateful"
1212
)
1313

1414
var (
15-
basicLexer = lexer.Must(ebnf.New(`
16-
Comment = ("REM" | "rem" ) { "\u0000"…"\uffff"-"\n"-"\r" } .
17-
Ident = (alpha | "_") { "_" | alpha | digit } .
18-
String = "\"" { "\u0000"…"\uffff"-"\""-"\\" | "\\" any } "\"" .
19-
Number = [ "-" | "+" ] ("." | digit) { "." | digit } .
20-
Punct = "!"…"/" | ":"…"@" | "["…` + "\"`\"" + ` | "{"…"~" .
21-
EOL = ( "\n" | "\r" ) { "\n" | "\r" }.
22-
Whitespace = ( " " | "\t" ) { " " | "\t" } .
23-
24-
alpha = "a"…"z" | "A"…"Z" .
25-
digit = "0"…"9" .
26-
any = "\u0000"…"\uffff" .
27-
`))
15+
basicLexer = lexer.Must(stateful.NewSimple([]stateful.Rule{
16+
{"Comment", `(?i)rem[^\n]*`, nil},
17+
{"String", `"(\\"|[^"])*"`, nil},
18+
{"Number", `[-+]?(\d*\.)?\d+`, nil},
19+
{"Ident", `[a-zA-Z_]\w*`, nil},
20+
{"Punct", `[-[!@#$%^&*()+_={}\|:;"'<,>.?/]|]`, nil},
21+
{"EOL", `[\n\r]+`, nil},
22+
{"whitespace", `[ \t]+`, nil},
23+
}))
2824

2925
basicParser = participle.MustBuild(&Program{},
3026
participle.Lexer(basicLexer),
3127
participle.CaseInsensitive("Ident"),
3228
participle.Unquote("String"),
3329
participle.UseLookahead(2),
34-
participle.Elide("Whitespace"),
3530
)
3631

3732
cli struct {

_examples/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ go 1.14
44

55
require (
66
github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2
7-
github.com/alecthomas/kong v0.2.8
7+
github.com/alecthomas/kong v0.2.11
88
github.com/alecthomas/participle v0.4.1
99
github.com/alecthomas/repr v0.0.0-20200325044227-4184120f674c
1010
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect

_examples/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2/go.mod h1:CxC
33
github.com/alecthomas/kong v0.2.1/go.mod h1:+inYUSluD+p4L8KdviBSgzcqEjUQOfC5fQDRFuc36lI=
44
github.com/alecthomas/kong v0.2.8 h1:VSWWkD1TZij2967FcfVwgRwlp3khCA0liZIkUI9hTdU=
55
github.com/alecthomas/kong v0.2.8/go.mod h1:kQOmtJgV+Lb4aj+I2LEn40cbtawdWJ9Y8QLq+lElKxE=
6+
github.com/alecthomas/kong v0.2.11 h1:RKeJXXWfg9N47RYfMm0+igkxBCTF4bzbneAxaqid0c4=
7+
github.com/alecthomas/kong v0.2.11/go.mod h1:kQOmtJgV+Lb4aj+I2LEn40cbtawdWJ9Y8QLq+lElKxE=
68
github.com/alecthomas/participle v0.4.1 h1:P2PJWzwrSpuCWXKnzqvw0b0phSfH1kJo4p2HvLynVsI=
79
github.com/alecthomas/participle v0.4.1/go.mod h1:T8u4bQOSMwrkTWOSyt8/jSFPEnRtd0FKFMjVfYBlqPs=
810
github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ=

_examples/graphql/main.go

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99

1010
"github.com/alecthomas/participle"
1111
"github.com/alecthomas/participle/lexer"
12-
"github.com/alecthomas/participle/lexer/ebnf"
12+
"github.com/alecthomas/participle/lexer/stateful"
1313
)
1414

1515
type File struct {
@@ -62,17 +62,13 @@ type Value struct {
6262
}
6363

6464
var (
65-
graphQLLexer = lexer.Must(ebnf.New(`
66-
Comment = ("#" | "//") { "\u0000"…"\uffff"-"\n" } .
67-
Ident = (alpha | "_") { "_" | alpha | digit } .
68-
Number = ("." | digit) {"." | digit} .
69-
Whitespace = " " | "\t" | "\n" | "\r" .
70-
Punct = "!"…"/" | ":"…"@" | "["…` + "\"`\"" + ` | "{"…"~" .
71-
72-
alpha = "a"…"z" | "A"…"Z" .
73-
digit = "0"…"9" .
74-
`))
75-
65+
graphQLLexer = lexer.Must(stateful.NewSimple([]stateful.Rule{
66+
{"Comment", `(?:#|//)[^\n]*\n?`, nil},
67+
{"Ident", `[a-zA-Z]\w*`, nil},
68+
{"Number", `(?:\d*\.)?\d+`, nil},
69+
{"Punct", `[-[!@#$%^&*()+_={}\|:;"'<,>.?/]|]`, nil},
70+
{"Whitespace", `[ \t\n\r]+`, nil},
71+
}))
7672
parser = participle.MustBuild(&File{},
7773
participle.Lexer(graphQLLexer),
7874
participle.Elide("Comment", "Whitespace"),

_examples/ini/main.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,21 @@ import (
55

66
"github.com/alecthomas/participle"
77
"github.com/alecthomas/participle/lexer"
8+
"github.com/alecthomas/participle/lexer/stateful"
9+
810
"github.com/alecthomas/repr"
911
)
1012

1113
// A custom lexer for INI files. This illustrates a relatively complex Regexp lexer, as well
1214
// as use of the Unquote filter, which unquotes string tokens.
13-
var iniLexer = lexer.Must(lexer.Regexp(
14-
`(?m)` +
15-
`(\s+)` +
16-
`|(^[#;].*$)` +
17-
`|(?P<Ident>[a-zA-Z][a-zA-Z_\d]*)` +
18-
`|(?P<String>"(?:\\.|[^"])*")` +
19-
`|(?P<Float>\d+(?:\.\d+)?)` +
20-
`|(?P<Punct>[][=])`,
21-
))
15+
var iniLexer = lexer.Must(stateful.NewSimple([]stateful.Rule{
16+
{`Ident`, `[a-zA-Z][a-zA-Z_\d]*`, nil},
17+
{`String`, `"(?:\\.|[^"])*"`, nil},
18+
{`Float`, `\d+(?:\.\d+)?`, nil},
19+
{`Punct`, `[][=]`, nil},
20+
{"comment", `[#;][^\n]*`, nil},
21+
{"whitespace", `\s+`, nil},
22+
}))
2223

2324
type INI struct {
2425
Properties []*Property `@@*`

_examples/sql/main.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@ package main
33

44
import (
55
"github.com/alecthomas/kong"
6+
67
"github.com/alecthomas/participle"
78
"github.com/alecthomas/participle/lexer"
9+
"github.com/alecthomas/participle/lexer/stateful"
10+
811
"github.com/alecthomas/repr"
912
)
1013

@@ -156,13 +159,14 @@ var (
156159
SQL string `arg:"" required:"" help:"SQL to parse."`
157160
}
158161

159-
sqlLexer = lexer.Must(lexer.Regexp(`(\s+)` +
160-
`|(?P<Keyword>(?i)SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|AS|IN)` +
161-
`|(?P<Ident>[a-zA-Z_][a-zA-Z0-9_]*)` +
162-
`|(?P<Number>[-+]?\d*\.?\d+([eE][-+]?\d+)?)` +
163-
`|(?P<String>'[^']*'|"[^"]*")` +
164-
`|(?P<Operators><>|!=|<=|>=|[-+*/%,.()=<>])`,
165-
))
162+
sqlLexer = lexer.Must(stateful.NewSimple([]stateful.Rule{
163+
{`Keyword`, `(?i)SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|AS|IN`, nil},
164+
{`Ident`, `[a-zA-Z_][a-zA-Z0-9_]*`, nil},
165+
{`Number`, `[-+]?\d*\.?\d+([eE][-+]?\d+)?`, nil},
166+
{`String`, `'[^']*'|"[^"]*"`, nil},
167+
{`Operators`, `<>|!=|<=|>=|[-+*/%,.()=<>]`, nil},
168+
{"whitespace", `\s+`, nil},
169+
}))
166170
sqlParser = participle.MustBuild(
167171
&Select{},
168172
participle.Lexer(sqlLexer),

_examples/toml/main.go

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ import (
44
"os"
55

66
"github.com/alecthomas/kong"
7+
78
"github.com/alecthomas/participle"
89
"github.com/alecthomas/participle/lexer"
9-
"github.com/alecthomas/participle/lexer/ebnf"
10+
"github.com/alecthomas/participle/lexer/stateful"
11+
1012
"github.com/alecthomas/repr"
1113
)
1214

@@ -32,8 +34,7 @@ type Value struct {
3234
Date *string `| @Date`
3335
Time *string `| @Time`
3436
Bool *bool `| (@"true" | "false")`
35-
Integer *int64 `| @Int`
36-
Float *float64 `| @Float`
37+
Number *float64 `| @Number`
3738
List []*Value `| "[" [ @@ { "," @@ } ] "]"`
3839
}
3940

@@ -43,28 +44,22 @@ type Section struct {
4344
}
4445

4546
var (
46-
tomlLexer = lexer.Must(ebnf.New(`
47-
Comment = "#" { "\u0000"…"\uffff"-"\n" } .
48-
DateTime = date "T" time [ "-" digit digit ":" digit digit ].
49-
Date = date .
50-
Time = time .
51-
Ident = (alpha | "_") { "_" | alpha | digit } .
52-
String = "\"" { "\u0000"…"\uffff"-"\""-"\\" | "\\" any } "\"" .
53-
Int = [ "-" | "+" ] digit { digit } .
54-
Float = ("." | digit) {"." | digit} .
55-
Punct = "!"…"/" | ":"…"@" | "["…` + "\"`\"" + ` | "{"…"~" .
56-
Whitespace = " " | "\t" | "\n" | "\r" .
57-
58-
alpha = "a"…"z" | "A"…"Z" .
59-
digit = "0"…"9" .
60-
any = "\u0000"…"\uffff" .
61-
date = digit digit digit digit "-" digit digit "-" digit digit .
62-
time = digit digit ":" digit digit ":" digit digit [ "." { digit } ] .
63-
`))
47+
tomlLexer = lexer.Must(stateful.NewSimple([]stateful.Rule{
48+
{"DateTime", `\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?(-\d\d:\d\d)?`, nil},
49+
{"Date", `\d\d\d\d-\d\d-\d\d`, nil},
50+
{"Time", `\d\d:\d\d:\d\d(\.\d+)?`, nil},
51+
{"Ident", `[a-zA-Z_][a-zA-Z_0-9]*`, nil},
52+
{"String", `"[^"]*"`, nil},
53+
{"Number", `[-+]?[.0-9]+\b`, nil},
54+
{"Punct", `\[|]|[-!()+/*=,]`, nil},
55+
{"comment", `#[^\n]+`, nil},
56+
{"whitespace", `\s+`, nil},
57+
}))
6458
tomlParser = participle.MustBuild(&TOML{},
65-
participle.Lexer(tomlLexer),
59+
participle.Lexer(
60+
tomlLexer,
61+
),
6662
participle.Unquote("String"),
67-
participle.Elide("Whitespace", "Comment"),
6863
)
6964

7065
cli struct {

ebnf_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package participle
1+
package participle_test
22

33
import (
44
"strings"

error_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
package participle
1+
package participle_test
22

33
import (
44
"testing"
55

66
"github.com/stretchr/testify/assert"
7+
8+
"github.com/alecthomas/participle"
79
)
810

911
func TestErrorReporting(t *testing.T) {
@@ -22,7 +24,7 @@ func TestErrorReporting(t *testing.T) {
2224
type grammar struct {
2325
Decls []*decl `( @@ ";" )*`
2426
}
25-
p := mustTestParser(t, &grammar{}, UseLookahead(5))
27+
p := mustTestParser(t, &grammar{}, participle.UseLookahead(5))
2628

2729
var err error
2830
ast := &grammar{}

lexer/adapters.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package lexer
2+
3+
import (
4+
"bytes"
5+
"io"
6+
"strings"
7+
)
8+
9+
type legacy struct {
10+
legacy interface {
11+
Lex(io.Reader) (Lexer, error)
12+
Symbols() map[string]rune
13+
}
14+
}
15+
16+
func (l legacy) LexReader(r io.Reader) (Lexer, error) { return l.legacy.Lex(r) }
17+
func (l legacy) LexString(s string) (Lexer, error) { return l.legacy.Lex(strings.NewReader(s)) }
18+
func (l legacy) LexBytes(b []byte) (Lexer, error) { return l.legacy.Lex(bytes.NewReader(b)) }
19+
func (l legacy) Symbols() map[string]rune { return l.legacy.Symbols() }
20+
21+
// Legacy is a shim for Participle v0 lexer definitions.
22+
func Legacy(def interface {
23+
Lex(io.Reader) (Lexer, error)
24+
Symbols() map[string]rune
25+
}) Definition {
26+
return legacy{def}
27+
}
28+
29+
// Simple upgrades a lexer that only implements LexReader() by using
30+
// strings/bytes.NewReader().
31+
func Simple(def interface {
32+
Symbols() map[string]rune
33+
LexReader(io.Reader) (Lexer, error)
34+
}) Definition {
35+
return simple{def}
36+
}
37+
38+
type simplei interface {
39+
Symbols() map[string]rune
40+
LexReader(io.Reader) (Lexer, error)
41+
}
42+
43+
type simple struct{ simplei }
44+
45+
func (s simple) LexString(str string) (Lexer, error) { return s.LexReader(strings.NewReader(str)) }
46+
func (s simple) LexBytes(b []byte) (Lexer, error) { return s.LexReader(bytes.NewReader(b)) }

0 commit comments

Comments
 (0)