Rewrite macro_rules! parser to not use the MBE engine itself

joshtriplett · joshtriplett · commit 01f38b4755d2 · 2025-06-26T15:14:27.000-07:00
The `macro_rules!` parser was written to match the series of rules using
the macros-by-example (MBE) engine and a hand-written equivalent of the
left-hand side of a MBE macro. This was complex to read, difficult to
extend, and produced confusing error messages. Because it was using the
MBE engine, any parse failure would be reported as if some macro was
being applied to the `macro_rules!` invocation itself; for instance,
errors would talk about "macro invocation", "macro arguments", and
"macro call", when they were actually about the macro *definition*.

And in practice, the `macro_rules!` parser only used the MBE engine to
extract the left-hand side and right-hand side of each rule as a token
tree, and then parsed the rest using a separate parser.

Rewrite it to parse the series of rules using a simple loop, instead.
This makes it more extensible in the future, and improves error
messages. For instance, omitting a semicolon between rules will result
in "expected `;`" and "unexpected token", rather than the confusing "no
rules expected this token in macro call".

This work was greatly aided by pair programming with Vincenzo Palazzo
and Eric Holk.
diff --git a/compiler/rustc_expand/src/mbe/diagnostics.rs b/compiler/rustc_expand/src/mbe/diagnostics.rs
@@ -194,38 +194,6 @@ impl<'dcx> CollectTrackerAndEmitter<'dcx, '_> {
     }
 }
 
-/// Currently used by macro_rules! compilation to extract a little information from the `Failure`
-/// case.
-pub(crate) struct FailureForwarder<'matcher> {
-    expected_token: Option<&'matcher Token>,
-}
-
-impl<'matcher> FailureForwarder<'matcher> {
-    pub(crate) fn new() -> Self {
-        Self { expected_token: None }
-    }
-}
-
-impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> {
-    type Failure = (Token, u32, &'static str);
-
-    fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
-        (tok, position, msg)
-    }
-
-    fn description() -> &'static str {
-        "failure-forwarder"
-    }
-
-    fn set_expected_token(&mut self, tok: &'matcher Token) {
-        self.expected_token = Some(tok);
-    }
-
-    fn get_expected_token(&self) -> Option<&'matcher Token> {
-        self.expected_token
-    }
-}
-
 pub(super) fn emit_frag_parse_err(
     mut e: Diag<'_>,
     parser: &Parser<'_>,
@@ -320,7 +288,7 @@ enum ExplainDocComment {
     },
 }
 
-pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
+fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
     if let Ok(src) = sm.span_to_snippet(span) {
         if src.starts_with("///") || src.starts_with("/**") {
             err.subdiagnostic(ExplainDocComment::Outer { span });
@@ -332,7 +300,7 @@ pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Spa
 
 /// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
 /// other tokens, this is "unexpected token...".
-pub(super) fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
+fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
     if let Some(expected_token) = expected_token {
         Cow::from(format!("expected {}, found {}", token_descr(expected_token), token_descr(tok)))
     } else {
diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -541,8 +541,6 @@ impl TtParser {
                         // The separator matches the current token. Advance past it.
                         mp.idx += 1;
                         self.next_mps.push(mp);
-                    } else {
-                        track.set_expected_token(separator);
                     }
                 }
                 &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs
@@ -19,12 +19,13 @@ use rustc_lint_defs::BuiltinLintDiag;
 use rustc_lint_defs::builtin::{
     RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
 };
-use rustc_parse::parser::{ParseNtResult, Parser, Recovery};
+use rustc_parse::exp;
+use rustc_parse::parser::{Parser, Recovery};
 use rustc_session::Session;
 use rustc_session::parse::ParseSess;
 use rustc_span::edition::Edition;
 use rustc_span::hygiene::Transparency;
-use rustc_span::{Ident, MacroRulesNormalizedIdent, Span, kw, sym};
+use rustc_span::{Ident, Span, kw, sym};
 use tracing::{debug, instrument, trace, trace_span};
 
 use super::macro_parser::{NamedMatches, NamedParseResult};
@@ -34,8 +35,6 @@ use crate::base::{
     SyntaxExtensionKind, TTMacroExpander,
 };
 use crate::expand::{AstFragment, AstFragmentKind, ensure_complete_parse, parse_ast_fragment};
-use crate::mbe::diagnostics::{annotate_doc_comment, parse_failure_msg};
-use crate::mbe::macro_parser::NamedMatch::*;
 use crate::mbe::macro_parser::{Error, ErrorReported, Failure, MatcherLoc, Success, TtParser};
 use crate::mbe::transcribe::transcribe;
 use crate::mbe::{self, KleeneOp, macro_check};
@@ -168,11 +167,6 @@ pub(super) trait Tracker<'matcher> {
     fn recovery() -> Recovery {
         Recovery::Forbidden
     }
-
-    fn set_expected_token(&mut self, _tok: &'matcher Token) {}
-    fn get_expected_token(&self) -> Option<&'matcher Token> {
-        None
-    }
 }
 
 /// A noop tracker that is used in the hot path of the expansion, has zero overhead thanks to
@@ -360,11 +354,6 @@ pub(super) fn try_match_macro<'matcher, T: Tracker<'matcher>>(
     Err(CanRetry::Yes)
 }
 
-// Note that macro-by-example's input is also matched against a token tree:
-//                   $( $lhs:tt => $rhs:tt );+
-//
-// Holy self-referential!
-
 /// Converts a macro item into a syntax extension.
 pub fn compile_declarative_macro(
     sess: &Session,
@@ -390,152 +379,63 @@ pub fn compile_declarative_macro(
     };
     let dummy_syn_ext = |guar| (mk_syn_ext(Arc::new(DummyExpander(guar))), Vec::new());
 
-    let lhs_nm = Ident::new(sym::lhs, span);
-    let rhs_nm = Ident::new(sym::rhs, span);
-    let tt_spec = Some(NonterminalKind::TT);
     let macro_rules = macro_def.macro_rules;
+    let exp_sep = if macro_rules { exp!(Semi) } else { exp!(Comma) };
 
-    // Parse the macro_rules! invocation
-
-    // The pattern that macro_rules matches.
-    // The grammar for macro_rules! is:
-    // $( $lhs:tt => $rhs:tt );+
-    // ...quasiquoting this would be nice.
-    // These spans won't matter, anyways
-    let argument_gram = vec![
-        mbe::TokenTree::Sequence(
-            DelimSpan::dummy(),
-            mbe::SequenceRepetition {
-                tts: vec![
-                    mbe::TokenTree::MetaVarDecl(span, lhs_nm, tt_spec),
-                    mbe::TokenTree::token(token::FatArrow, span),
-                    mbe::TokenTree::MetaVarDecl(span, rhs_nm, tt_spec),
-                ],
-                separator: Some(Token::new(
-                    if macro_rules { token::Semi } else { token::Comma },
-                    span,
-                )),
-                kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, span),
-                num_captures: 2,
-            },
-        ),
-        // to phase into semicolon-termination instead of semicolon-separation
-        mbe::TokenTree::Sequence(
-            DelimSpan::dummy(),
-            mbe::SequenceRepetition {
-                tts: vec![mbe::TokenTree::token(
-                    if macro_rules { token::Semi } else { token::Comma },
-                    span,
-                )],
-                separator: None,
-                kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, span),
-                num_captures: 0,
-            },
-        ),
-    ];
-    // Convert it into `MatcherLoc` form.
-    let argument_gram = mbe::macro_parser::compute_locs(&argument_gram);
-
-    let create_parser = || {
-        let body = macro_def.body.tokens.clone();
-        Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS)
-    };
-
-    let parser = create_parser();
-    let mut tt_parser =
-        TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
-    let argument_map =
-        match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
-            Success(m) => m,
-            Failure(()) => {
-                // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
-                // with another one that gives us the information we need.
-                // For this we need to reclone the macro body as the previous parser consumed it.
-                let retry_parser = create_parser();
-
-                let mut track = diagnostics::FailureForwarder::new();
-                let parse_result =
-                    tt_parser.parse_tt(&mut Cow::Owned(retry_parser), &argument_gram, &mut track);
-                let Failure((token, _, msg)) = parse_result else {
-                    unreachable!("matcher returned something other than Failure after retry");
-                };
-
-                let s = parse_failure_msg(&token, track.get_expected_token());
-                let sp = token.span.substitute_dummy(span);
-                let mut err = sess.dcx().struct_span_err(sp, s);
-                err.span_label(sp, msg);
-                annotate_doc_comment(&mut err, sess.source_map(), sp);
-                let guar = err.emit();
-                return dummy_syn_ext(guar);
-            }
-            Error(sp, msg) => {
-                let guar = sess.dcx().span_err(sp.substitute_dummy(span), msg);
-                return dummy_syn_ext(guar);
-            }
-            ErrorReported(guar) => {
-                return dummy_syn_ext(guar);
-            }
-        };
+    let body = macro_def.body.tokens.clone();
+    let mut p = Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS);
 
+    // Don't abort iteration early, so that multiple errors can be reported.
     let mut guar = None;
     let mut check_emission = |ret: Result<(), ErrorGuaranteed>| guar = guar.or(ret.err());
 
-    // Extract the arguments:
-    let lhses = match &argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] {
-        MatchedSeq(s) => s
-            .iter()
-            .map(|m| {
-                if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
-                    let tt = mbe::quoted::parse(
-                        &TokenStream::new(vec![tt.clone()]),
-                        true,
-                        sess,
-                        node_id,
-                        features,
-                        edition,
-                    )
-                    .pop()
-                    .unwrap();
-                    // We don't handle errors here, the driver will abort
-                    // after parsing/expansion. We can report every error in every macro this way.
-                    check_emission(check_lhs_nt_follows(sess, node_id, &tt));
-                    return tt;
-                }
-                sess.dcx().span_bug(span, "wrong-structured lhs")
-            })
-            .collect::<Vec<mbe::TokenTree>>(),
-        _ => sess.dcx().span_bug(span, "wrong-structured lhs"),
-    };
-
-    let rhses = match &argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] {
-        MatchedSeq(s) => s
-            .iter()
-            .map(|m| {
-                if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
-                    return mbe::quoted::parse(
-                        &TokenStream::new(vec![tt.clone()]),
-                        false,
-                        sess,
-                        node_id,
-                        features,
-                        edition,
-                    )
-                    .pop()
-                    .unwrap();
-                }
-                sess.dcx().span_bug(span, "wrong-structured rhs")
-            })
-            .collect::<Vec<mbe::TokenTree>>(),
-        _ => sess.dcx().span_bug(span, "wrong-structured rhs"),
-    };
+    let mut lhses = Vec::new();
+    let mut rhses = Vec::new();
 
-    for rhs in &rhses {
-        check_emission(check_rhs(sess, rhs));
+    while p.token != token::Eof {
+        let lhs_tt = p.parse_token_tree();
+        let lhs_tt = mbe::quoted::parse(
+            &TokenStream::new(vec![lhs_tt]),
+            true, // LHS
+            sess,
+            node_id,
+            features,
+            edition,
+        )
+        .pop()
+        .unwrap();
+        // We don't handle errors here, the driver will abort after parsing/expansion. We can
+        // report every error in every macro this way.
+        check_emission(check_lhs_nt_follows(sess, node_id, &lhs_tt));
+        check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(&lhs_tt)));
+        if let Err(e) = p.expect(exp!(FatArrow)) {
+            return dummy_syn_ext(e.emit());
+        }
+        let rhs_tt = p.parse_token_tree();
+        let rhs_tt = mbe::quoted::parse(
+            &TokenStream::new(vec![rhs_tt]),
+            false, // RHS
+            sess,
+            node_id,
+            features,
+            edition,
+        )
+        .pop()
+        .unwrap();
+        check_emission(check_rhs(sess, &rhs_tt));
+        lhses.push(lhs_tt);
+        rhses.push(rhs_tt);
+        if p.token == token::Eof {
+            break;
+        }
+        if let Err(e) = p.expect(exp_sep) {
+            return dummy_syn_ext(e.emit());
+        }
     }
 
-    // Don't abort iteration early, so that errors for multiple lhses can be reported.
-    for lhs in &lhses {
-        check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(lhs)));
+    if lhses.is_empty() {
+        let guar = sess.dcx().span_err(span, "macros must contain at least one rule");
+        return dummy_syn_ext(guar);
     }
 
     check_emission(macro_check::check_meta_variables(&sess.psess, node_id, span, &lhses, &rhses));
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
@@ -1748,7 +1748,6 @@ symbols! {
         resume,
         return_position_impl_trait_in_trait,
         return_type_notation,
-        rhs,
         riscv_target_feature,
         rlib,
         ropi,
diff --git a/tests/ui/attributes/crate-type-macro-empty.rs b/tests/ui/attributes/crate-type-macro-empty.rs
@@ -2,6 +2,6 @@
 #[crate_type = foo!()]
 //~^ ERROR cannot find macro `foo` in this scope
 
-macro_rules! foo {} //~ ERROR unexpected end of macro invocation
+macro_rules! foo {} //~ ERROR macros must contain at least one rule
 
 fn main() {}
diff --git a/tests/ui/attributes/crate-type-macro-empty.stderr b/tests/ui/attributes/crate-type-macro-empty.stderr
@@ -1,8 +1,8 @@
-error: unexpected end of macro invocation
+error: macros must contain at least one rule
   --> $DIR/crate-type-macro-empty.rs:5:1
    |
 LL | macro_rules! foo {}
-   | ^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
+   | ^^^^^^^^^^^^^^^^^^^
 
 error: cannot find macro `foo` in this scope
   --> $DIR/crate-type-macro-empty.rs:2:16
diff --git a/tests/ui/macros/missing-semi.stderr b/tests/ui/macros/missing-semi.stderr
@@ -1,8 +1,10 @@
 error: expected `;`, found `(`
   --> $DIR/missing-semi.rs:6:5
    |
+LL |     }
+   |      - expected `;`
 LL |     () => {
-   |     ^ no rules expected this token in macro call
+   |     ^ unexpected token
 
 error: aborting due to 1 previous error
 
diff --git a/tests/ui/parser/issues/issue-7970b.rs b/tests/ui/parser/issues/issue-7970b.rs
@@ -1,4 +1,4 @@
 fn main() {}
 
 macro_rules! test {}
-//~^ ERROR unexpected end of macro invocation
+//~^ ERROR macros must contain at least one rule
diff --git a/tests/ui/parser/issues/issue-7970b.stderr b/tests/ui/parser/issues/issue-7970b.stderr
@@ -1,8 +1,8 @@
-error: unexpected end of macro invocation
+error: macros must contain at least one rule
   --> $DIR/issue-7970b.rs:3:1
    |
 LL | macro_rules! test {}
-   | ^^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
+   | ^^^^^^^^^^^^^^^^^^^^
 
 error: aborting due to 1 previous error
 
diff --git a/tests/ui/parser/macros-no-semicolon-items.rs b/tests/ui/parser/macros-no-semicolon-items.rs
@@ -1,5 +1,5 @@
 macro_rules! foo()  //~ ERROR semicolon
-                    //~| ERROR unexpected end of macro
+                    //~| ERROR macros must contain at least one rule
 
 macro_rules! bar {
     ($($tokens:tt)*) => {}
diff --git a/tests/ui/parser/macros-no-semicolon-items.stderr b/tests/ui/parser/macros-no-semicolon-items.stderr
@@ -38,11 +38,11 @@ help: add a semicolon
 LL | );
    |  +
 
-error: unexpected end of macro invocation
+error: macros must contain at least one rule
   --> $DIR/macros-no-semicolon-items.rs:1:1
    |
 LL | macro_rules! foo()
-   | ^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
+   | ^^^^^^^^^^^^^^^^^^
 
 error: aborting due to 3 previous errors
 

Original file line number	Diff line number	Diff line change
`@@ -541,8 +541,6 @@ impl TtParser {`
`541`	`541`	`// The separator matches the current token. Advance past it.`
`542`	`542`	`mp.idx += 1;`
`543`	`543`	`self.next_mps.push(mp);`
`544`		`- } else {`
`545`		`- track.set_expected_token(separator);`
`546`	`544`	`}`
`547`	`545`	`}`
`548`	`546`	`&MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {`
Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,8 @@`
`1`		`-error: unexpected end of macro invocation`
	`1`	`+error: macros must contain at least one rule`
`2`	`2`	`--> $DIR/issue-7970b.rs:3:1`
`3`	`3`	`\|`
`4`	`4`	`LL \| macro_rules! test {}`
`5`		`- \| ^^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments`
	`5`	`+ \| ^^^^^^^^^^^^^^^^^^^^`
`6`	`6`
`7`	`7`	`error: aborting due to 1 previous error`
`8`	`8`