From cdb6e1e15f691599b15a4d474952f38c7381a964 Mon Sep 17 00:00:00 2001 From: Carol Nichols Date: Tue, 5 May 2015 19:26:16 -0400 Subject: [PATCH 1/3] Correct a typo in a declared token in the reference grammar This appears to not have too much of a detrimental effect, but it doesn't seem to be what is intended either. antlr doesn't mind that `PLUS` isn't declared in `tokens` and happily uses the `PLUS` that appears later in the file, but the generated RustLexer.tokens had PLUS at the end rather than where it was intended: NOT=10 TILDE=11 PLUT=12 MINUS=13 ... PLUS=56 --- src/grammar/RustLexer.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index 3d8f3aeb28fa7..5dcf0f6935f22 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -8,7 +8,7 @@ lexer grammar RustLexer; tokens { - EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT, + EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS, MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP, BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON, MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET, From 1ca9ed61d62b529aa403229b0a23dac69a091fe3 Mon Sep 17 00:00:00 2001 From: Carol Nichols Date: Tue, 5 May 2015 19:29:58 -0400 Subject: [PATCH 2/3] Declare other tokens used later in the reference grammar There were some tokens used in the grammar but not declared. Antlr doesn't really seem to care and happily uses them, but they appear in RustLexer.tokens in a potentially-unexpected order. --- src/grammar/RustLexer.g4 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index 5dcf0f6935f22..f062d33f25e25 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -12,10 +12,10 @@ tokens { MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP, BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON, MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET, - LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, + LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE, LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY, - LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT, - COMMENT, SHEBANG + LIT_BINARY_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT, + COMMENT, SHEBANG, UTF8_BOM } import xidstart , xidcontinue; From 9c7d5ae57c27ebfc019c2c23283bb905d8c3b74f Mon Sep 17 00:00:00 2001 From: Carol Nichols Date: Tue, 5 May 2015 19:32:26 -0400 Subject: [PATCH 3/3] Panic if the grammar verifier sees a token it doesn't recognize To prevent the reference grammar from getting out of sync with the real grammar, panic if RustLexer.tokens contains an unknown token in a similar way that verify.rs panics if it encounters an unknown binary operation token. --- src/grammar/verify.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index dec797747c270..10b8abfc78606 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -111,7 +111,7 @@ fn parse_token_list(file: &str) -> HashMap { "LIT_BINARY_RAW" => token::Literal(token::BinaryRaw(Name(0), 0), None), "QUESTION" => token::Question, "SHEBANG" => token::Shebang(Name(0)), - _ => continue, + _ => panic!("Bad token str `{}`", val), }; res.insert(num.to_string(), tok);