From 099e71650277642db453c157b529c961705f36b5 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Mar 2024 16:07:09 +1100 Subject: [PATCH 01/12] Factor out `tt` pushes. --- compiler/rustc_expand/src/mbe/transcribe.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index 8fcd468e34b7a..cd90349e6726f 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -250,26 +250,25 @@ pub(super) fn transcribe<'a>( // the meta-var. let ident = MacroRulesNormalizedIdent::new(original_ident); if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { - match cur_matched { + let tt = match cur_matched { MatchedTokenTree(tt) => { // `tt`s are emitted into the output stream directly as "raw tokens", // without wrapping them into groups. - let tt = maybe_use_metavar_location(cx, &stack, sp, tt, &mut marker); - result.push(tt); + maybe_use_metavar_location(cx, &stack, sp, tt, &mut marker) } MatchedNonterminal(nt) => { // Other variables are emitted into the output stream as groups with // `Delimiter::Invisible` to maintain parsing priorities. // `Interpolated` is currently used for such groups in rustc parser. marker.visit_span(&mut sp); - result - .push(TokenTree::token_alone(token::Interpolated(nt.clone()), sp)); + TokenTree::token_alone(token::Interpolated(nt.clone()), sp) } MatchedSeq(..) => { // We were unable to descend far enough. This is an error. return Err(cx.dcx().create_err(VarStillRepeating { span: sp, ident })); } - } + }; + result.push(tt) } else { // If we aren't able to match the meta-var, we push it back into the result but // with modified syntax context. (I believe this supports nested macros). From b9ead994b3190f44d2c6910f400c62363bc82cbc Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 06:51:49 +1100 Subject: [PATCH 02/12] Rename `Token::is_path`. This makes it consistent with `is_whole_expr` and `is_whole_block`. --- compiler/rustc_ast/src/token.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index c17020ed6632e..2e9c2d053704f 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -664,7 +664,7 @@ impl Token { } /// Returns `true` if the token is an interpolated path. - fn is_path(&self) -> bool { + fn is_whole_path(&self) -> bool { if let Interpolated(nt) = &self.kind && let NtPath(..) = &nt.0 { @@ -710,7 +710,7 @@ impl Token { pub fn is_path_start(&self) -> bool { self == &ModSep || self.is_qpath_start() - || self.is_path() + || self.is_whole_path() || self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident() } From d919dbe370e28c53498acb13be80fc30097451fa Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 05:55:44 +1100 Subject: [PATCH 03/12] Use `maybe_whole!` to streamline `parse_attr_item`. --- compiler/rustc_parse/src/parser/attr.rs | 29 +++++++++---------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index d08c50b5b0628..b1cb9a3876c88 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -3,11 +3,12 @@ use crate::errors::{ SuffixedLiteralInAttribute, }; use crate::fluent_generated as fluent; +use crate::maybe_whole; use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle}; use rustc_ast as ast; use rustc_ast::attr; -use rustc_ast::token::{self, Delimiter, Nonterminal}; +use rustc_ast::token::{self, Delimiter}; use rustc_errors::{codes::*, Diag, PResult}; use rustc_span::{sym, BytePos, Span}; use thin_vec::ThinVec; @@ -251,25 +252,15 @@ impl<'a> Parser<'a> { /// PATH `=` UNSUFFIXED_LIT /// The delimiters or `=` are still put into the resulting token stream. pub fn parse_attr_item(&mut self, capture_tokens: bool) -> PResult<'a, ast::AttrItem> { - let item = match &self.token.kind { - token::Interpolated(nt) => match &nt.0 { - Nonterminal::NtMeta(item) => Some(item.clone().into_inner()), - _ => None, - }, - _ => None, + maybe_whole!(self, NtMeta, |attr| attr.into_inner()); + + let do_parse = |this: &mut Self| { + let path = this.parse_path(PathStyle::Mod)?; + let args = this.parse_attr_args()?; + Ok(ast::AttrItem { path, args, tokens: None }) }; - Ok(if let Some(item) = item { - self.bump(); - item - } else { - let do_parse = |this: &mut Self| { - let path = this.parse_path(PathStyle::Mod)?; - let args = this.parse_attr_args()?; - Ok(ast::AttrItem { path, args, tokens: None }) - }; - // Attr items don't have attributes - if capture_tokens { self.collect_tokens_no_attrs(do_parse) } else { do_parse(self) }? - }) + // Attr items don't have attributes + if capture_tokens { self.collect_tokens_no_attrs(do_parse) } else { do_parse(self) } } /// Parses attributes that appear after the opening of an item. These should From 8ac16c6193f6b694e5b05ff2209caafac792e760 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 06:47:59 +1100 Subject: [PATCH 04/12] Rewrite `parse_meta_item`. It can't use `maybe_whole`, but it can match `maybe_whole` more closely. Also add a test for a case that wasn't previously covered. --- compiler/rustc_parse/src/parser/attr.rs | 18 ++++++------- tests/ui/parser/attribute/attr-bad-meta-4.rs | 12 +++++++++ .../parser/attribute/attr-bad-meta-4.stderr | 25 +++++++++++++++++++ 3 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 tests/ui/parser/attribute/attr-bad-meta-4.rs create mode 100644 tests/ui/parser/attribute/attr-bad-meta-4.stderr diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index b1cb9a3876c88..ab5f51eedc307 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -362,22 +362,18 @@ impl<'a> Parser<'a> { /// meta_item_inner : (meta_item | UNSUFFIXED_LIT) (',' meta_item_inner)? ; /// ``` pub fn parse_meta_item(&mut self) -> PResult<'a, ast::MetaItem> { - let nt_meta = match &self.token.kind { - token::Interpolated(nt) => match &nt.0 { - token::NtMeta(e) => Some(e.clone()), - _ => None, - }, - _ => None, - }; - - if let Some(item) = nt_meta { - match item.meta(item.path.span) { + // We can't use `maybe_whole` here because it would bump in the `None` + // case, which we don't want. + if let token::Interpolated(nt) = &self.token.kind + && let token::NtMeta(attr_item) = &nt.0 + { + match attr_item.meta(attr_item.path.span) { Some(meta) => { self.bump(); return Ok(meta); } None => self.unexpected()?, - }; + } } let lo = self.token.span; diff --git a/tests/ui/parser/attribute/attr-bad-meta-4.rs b/tests/ui/parser/attribute/attr-bad-meta-4.rs new file mode 100644 index 0000000000000..cedbd1d6686bc --- /dev/null +++ b/tests/ui/parser/attribute/attr-bad-meta-4.rs @@ -0,0 +1,12 @@ +macro_rules! mac { + ($attr_item: meta) => { + #[cfg($attr_item)] + //~^ ERROR expected unsuffixed literal or identifier, found `an(arbitrary token stream)` + //~| ERROR expected unsuffixed literal or identifier, found `an(arbitrary token stream)` + struct S; + } +} + +mac!(an(arbitrary token stream)); + +fn main() {} diff --git a/tests/ui/parser/attribute/attr-bad-meta-4.stderr b/tests/ui/parser/attribute/attr-bad-meta-4.stderr new file mode 100644 index 0000000000000..a543bcb692e60 --- /dev/null +++ b/tests/ui/parser/attribute/attr-bad-meta-4.stderr @@ -0,0 +1,25 @@ +error: expected unsuffixed literal or identifier, found `an(arbitrary token stream)` + --> $DIR/attr-bad-meta-4.rs:3:15 + | +LL | #[cfg($attr_item)] + | ^^^^^^^^^^ +... +LL | mac!(an(arbitrary token stream)); + | -------------------------------- in this macro invocation + | + = note: this error originates in the macro `mac` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: expected unsuffixed literal or identifier, found `an(arbitrary token stream)` + --> $DIR/attr-bad-meta-4.rs:3:15 + | +LL | #[cfg($attr_item)] + | ^^^^^^^^^^ +... +LL | mac!(an(arbitrary token stream)); + | -------------------------------- in this macro invocation + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + = note: this error originates in the macro `mac` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 2 previous errors + From d4ad322b5d38dbdb4aeddbead04dd3e592b374fb Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 07:08:22 +1100 Subject: [PATCH 05/12] Use `maybe_whole!` to streamline `parse_item_common`. This requires changing `maybe_whole!` so it allows the value to be modified. --- compiler/rustc_parse/src/parser/item.rs | 14 ++++---------- compiler/rustc_parse/src/parser/mod.rs | 13 +++++++------ 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 18d210eacfa61..d54eb8dc4c9eb 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -6,6 +6,7 @@ use super::{ }; use crate::errors::{self, MacroExpandsToAdtField}; use crate::fluent_generated as fluent; +use crate::maybe_whole; use ast::token::IdentIsRaw; use rustc_ast::ast::*; use rustc_ast::ptr::P; @@ -115,17 +116,10 @@ impl<'a> Parser<'a> { fn_parse_mode: FnParseMode, force_collect: ForceCollect, ) -> PResult<'a, Option> { - // Don't use `maybe_whole` so that we have precise control - // over when we bump the parser - if let token::Interpolated(nt) = &self.token.kind - && let token::NtItem(item) = &nt.0 - { - let mut item = item.clone(); - self.bump(); - + maybe_whole!(self, NtItem, |item| { attrs.prepend_to_nt_inner(&mut item.attrs); - return Ok(Some(item.into_inner())); - }; + Some(item.into_inner()) + }); let item = self.collect_tokens_trailing_token(attrs, force_collect, |this: &mut Self, attrs| { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 125e77d8ee7c4..78233e0f8b5b1 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -93,12 +93,13 @@ pub enum TrailingToken { #[macro_export] macro_rules! maybe_whole { ($p:expr, $constructor:ident, |$x:ident| $e:expr) => { - if let token::Interpolated(nt) = &$p.token.kind { - if let token::$constructor(x) = &nt.0 { - let $x = x.clone(); - $p.bump(); - return Ok($e); - } + if let token::Interpolated(nt) = &$p.token.kind + && let token::$constructor(x) = &nt.0 + { + #[allow(unused_mut)] + let mut $x = x.clone(); + $p.bump(); + return Ok($e); } }; } From 0de050bd6dc1fa39c469eed23775472685b9284d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 07:13:52 +1100 Subject: [PATCH 06/12] Use `maybe_whole!` to streamline `parse_stmt_without_recovery`. --- compiler/rustc_parse/src/parser/stmt.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index fc907760531f7..7214aca2fb2d6 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -40,8 +40,8 @@ impl<'a> Parser<'a> { })) } - /// If `force_collect` is [`ForceCollect::Yes`], forces collection of tokens regardless of whether - /// or not we have attributes + /// If `force_collect` is [`ForceCollect::Yes`], forces collection of tokens regardless of + /// whether or not we have attributes. // Public for `cfg_eval` macro expansion. pub fn parse_stmt_without_recovery( &mut self, @@ -51,18 +51,12 @@ impl<'a> Parser<'a> { let attrs = self.parse_outer_attributes()?; let lo = self.token.span; - // Don't use `maybe_whole` so that we have precise control - // over when we bump the parser - if let token::Interpolated(nt) = &self.token.kind - && let token::NtStmt(stmt) = &nt.0 - { - let mut stmt = stmt.clone(); - self.bump(); + maybe_whole!(self, NtStmt, |stmt| { stmt.visit_attrs(|stmt_attrs| { attrs.prepend_to_nt_inner(stmt_attrs); }); - return Ok(Some(stmt.into_inner())); - } + Some(stmt.into_inner()) + }); if self.token.is_keyword(kw::Mut) && self.is_keyword_ahead(1, &[kw::Let]) { self.bump(); From 095722214d262981e44abd62b6abb2cc790d51c0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 07:18:38 +1100 Subject: [PATCH 07/12] Use better variable names in some `maybe_whole!` calls. --- compiler/rustc_parse/src/parser/mod.rs | 2 +- compiler/rustc_parse/src/parser/pat.rs | 2 +- compiler/rustc_parse/src/parser/stmt.rs | 4 ++-- compiler/rustc_parse/src/parser/ty.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 78233e0f8b5b1..290093fe101de 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1395,7 +1395,7 @@ impl<'a> Parser<'a> { /// so emit a proper diagnostic. // Public for rustfmt usage. pub fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> { - maybe_whole!(self, NtVis, |x| x.into_inner()); + maybe_whole!(self, NtVis, |vis| vis.into_inner()); if !self.eat_keyword(kw::Pub) { // We need a span for our `Spanned`, but there's inherently no diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index 88ae1b5420fe7..cb8d0a89d1707 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -435,7 +435,7 @@ impl<'a> Parser<'a> { syntax_loc: Option, ) -> PResult<'a, P> { maybe_recover_from_interpolated_ty_qpath!(self, true); - maybe_whole!(self, NtPat, |x| x); + maybe_whole!(self, NtPat, |pat| pat); let mut lo = self.token.span; diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 7214aca2fb2d6..6601011665b41 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -533,7 +533,7 @@ impl<'a> Parser<'a> { blk_mode: BlockCheckMode, can_be_struct_literal: bool, ) -> PResult<'a, (AttrVec, P)> { - maybe_whole!(self, NtBlock, |x| (AttrVec::new(), x)); + maybe_whole!(self, NtBlock, |block| (AttrVec::new(), block)); let maybe_ident = self.prev_token.clone(); self.maybe_recover_unexpected_block_label(); @@ -637,7 +637,7 @@ impl<'a> Parser<'a> { recover: AttemptLocalParseRecovery, ) -> PResult<'a, Option> { // Skip looking for a trailing semicolon when we have an interpolated statement. - maybe_whole!(self, NtStmt, |x| Some(x.into_inner())); + maybe_whole!(self, NtStmt, |stmt| Some(stmt.into_inner())); let Some(mut stmt) = self.parse_stmt_without_recovery(true, ForceCollect::No)? else { return Ok(None); diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs index 2385c18c0891c..1cea32cb90fea 100644 --- a/compiler/rustc_parse/src/parser/ty.rs +++ b/compiler/rustc_parse/src/parser/ty.rs @@ -250,7 +250,7 @@ impl<'a> Parser<'a> { ) -> PResult<'a, P> { let allow_qpath_recovery = recover_qpath == RecoverQPath::Yes; maybe_recover_from_interpolated_ty_qpath!(self, allow_qpath_recovery); - maybe_whole!(self, NtTy, |x| x); + maybe_whole!(self, NtTy, |ty| ty); let lo = self.token.span; let mut impl_dyn_multi = false; From c14d9ae23e3b940c757c0775c1fc9ae21c9dcc29 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 07:21:27 +1100 Subject: [PATCH 08/12] Fix some formatting. --- compiler/rustc_parse/src/parser/nonterminal.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index f1572a18a8be0..1cba7107e9a04 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -66,15 +66,14 @@ impl<'a> Parser<'a> { token::Interpolated(nt) => may_be_ident(&nt.0), _ => false, }, - NonterminalKind::PatParam { .. } | NonterminalKind::PatWithOr => { - match &token.kind { + NonterminalKind::PatParam { .. } | NonterminalKind::PatWithOr => match &token.kind { token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) token::OpenDelim(Delimiter::Parenthesis) | // tuple pattern token::OpenDelim(Delimiter::Bracket) | // slice pattern token::BinOp(token::And) | // reference token::BinOp(token::Minus) | // negative literal token::AndAnd | // double reference - token::Literal(_) | // literal + token::Literal(_) | // literal token::DotDot | // range pattern (future compat) token::DotDotDot | // range pattern (future compat) token::ModSep | // path @@ -84,8 +83,7 @@ impl<'a> Parser<'a> { token::BinOp(token::Or) => matches!(kind, NonterminalKind::PatWithOr), token::Interpolated(nt) => may_be_ident(&nt.0), _ => false, - } - } + }, NonterminalKind::Lifetime => match &token.kind { token::Lifetime(_) => true, token::Interpolated(nt) => { From dbed10a6a266b1b99c9f8bdd356b7233f188776c Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 08:04:36 +1100 Subject: [PATCH 09/12] Fix out-of-date comment. --- compiler/rustc_ast/src/token.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 2e9c2d053704f..f49eb2f22c50b 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -105,7 +105,7 @@ impl Lit { } } - /// Keep this in sync with `Token::can_begin_literal_or_bool` excluding unary negation. + /// Keep this in sync with `Token::can_begin_literal_maybe_minus` excluding unary negation. pub fn from_token(token: &Token) -> Option { match token.uninterpolate().kind { Ident(name, IdentIsRaw::No) if name.is_bool_lit() => Some(Lit::new(Bool, name, None)), From b7f3b714da1b9f72ae16f37ff6e2def306e85f69 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 08:48:38 +1100 Subject: [PATCH 10/12] Remove non-useful code path. It has no effect on anything in the test suite. --- compiler/rustc_parse/messages.ftl | 2 -- compiler/rustc_parse/src/errors.rs | 7 ------- compiler/rustc_parse/src/parser/expr.rs | 10 ---------- 3 files changed, 19 deletions(-) diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index a100e2d47bbbb..e1d26f090e0d7 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -390,8 +390,6 @@ parse_invalid_dyn_keyword = invalid `dyn` keyword parse_invalid_expression_in_let_else = a `{$operator}` expression cannot be directly assigned in `let...else` parse_invalid_identifier_with_leading_number = identifiers cannot start with a number -parse_invalid_interpolated_expression = invalid interpolated expression - parse_invalid_literal_suffix_on_tuple_index = suffixes on a tuple index are invalid .label = invalid suffix `{$suffix}` .tuple_exception_line_1 = `{$suffix}` is *temporarily* accepted on tuple index fields as it was incorrectly accepted on stable for a few releases diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 32b56bb7e877e..5cad3a568ee2c 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -850,13 +850,6 @@ pub(crate) struct StructLiteralNotAllowedHereSugg { pub right: Span, } -#[derive(Diagnostic)] -#[diag(parse_invalid_interpolated_expression)] -pub(crate) struct InvalidInterpolatedExpression { - #[primary_span] - pub span: Span, -} - #[derive(Diagnostic)] #[diag(parse_invalid_literal_suffix_on_tuple_index)] pub(crate) struct InvalidLiteralSuffixOnTupleIndex { diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 136145dd1826d..f0f013bad024e 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2043,16 +2043,6 @@ impl<'a> Parser<'a> { &mut self, mk_lit_char: impl FnOnce(Symbol, Span) -> L, ) -> PResult<'a, L> { - if let token::Interpolated(nt) = &self.token.kind - && let token::NtExpr(e) | token::NtLiteral(e) = &nt.0 - && matches!(e.kind, ExprKind::Err(_)) - { - let mut err = self - .dcx() - .create_err(errors::InvalidInterpolatedExpression { span: self.token.span }); - err.downgrade_to_delayed_bug(); - return Err(err); - } let token = self.token.clone(); let err = |self_: &Self| { let msg = format!("unexpected token: {}", super::token_descr(&token)); From a94bb2a0136b72d177fcd1c49d50227ded195133 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Mar 2024 13:33:33 +1100 Subject: [PATCH 11/12] Streamline `NamedMatch`. This commit combines `MatchedTokenTree` and `MatchedNonterminal`, which are often considered together, into a single `MatchedSingle`. It shares a representation with the newly-parameterized `ParseNtResult`. This will also make things much simpler if/when variants from `Interpolated` start being moved to `ParseNtResult`. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 17 ++++++--------- compiler/rustc_expand/src/mbe/macro_rules.rs | 8 +++---- compiler/rustc_expand/src/mbe/transcribe.rs | 20 +++++++++--------- compiler/rustc_parse/src/parser/mod.rs | 21 +++++++++++++++---- .../rustc_parse/src/parser/nonterminal.rs | 7 +++++-- 5 files changed, 42 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index ac5136539c382..a31be05ccc4d2 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -392,12 +392,7 @@ pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize { #[derive(Debug, Clone)] pub(crate) enum NamedMatch { MatchedSeq(Vec), - - // A metavar match of type `tt`. - MatchedTokenTree(rustc_ast::tokenstream::TokenTree), - - // A metavar match of any type other than `tt`. - MatchedNonterminal(Lrc<(Nonterminal, rustc_span::Span)>), + MatchedSingle(ParseNtResult>), } /// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) @@ -691,11 +686,11 @@ impl TtParser { } Ok(nt) => nt, }; - let m = match nt { - ParseNtResult::Nt(nt) => MatchedNonterminal(Lrc::new((nt, span))), - ParseNtResult::Tt(tt) => MatchedTokenTree(tt), - }; - mp.push_match(next_metavar, seq_depth, m); + mp.push_match( + next_metavar, + seq_depth, + MatchedSingle(nt.map_nt(|nt| (Lrc::new((nt, span))))), + ); mp.idx += 1; } else { unreachable!() diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index 3f29d7f74659c..7099f1b0d3521 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -5,7 +5,7 @@ use crate::mbe; use crate::mbe::diagnostics::{annotate_doc_comment, parse_failure_msg}; use crate::mbe::macro_check; use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser}; -use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc}; +use crate::mbe::macro_parser::{MatcherLoc, NamedMatch::*}; use crate::mbe::transcribe::transcribe; use ast::token::IdentIsRaw; @@ -22,7 +22,7 @@ use rustc_lint_defs::builtin::{ RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS, }; use rustc_lint_defs::BuiltinLintDiag; -use rustc_parse::parser::{Parser, Recovery}; +use rustc_parse::parser::{ParseNtResult, Parser, Recovery}; use rustc_session::parse::ParseSess; use rustc_session::Session; use rustc_span::edition::Edition; @@ -479,7 +479,7 @@ pub fn compile_declarative_macro( MatchedSeq(s) => s .iter() .map(|m| { - if let MatchedTokenTree(tt) = m { + if let MatchedSingle(ParseNtResult::Tt(tt)) = m { let tt = mbe::quoted::parse( &TokenStream::new(vec![tt.clone()]), true, @@ -505,7 +505,7 @@ pub fn compile_declarative_macro( MatchedSeq(s) => s .iter() .map(|m| { - if let MatchedTokenTree(tt) = m { + if let MatchedSingle(ParseNtResult::Tt(tt)) = m { return mbe::quoted::parse( &TokenStream::new(vec![tt.clone()]), false, diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index cd90349e6726f..dad83984c8b15 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -3,14 +3,14 @@ use crate::errors::{ CountRepetitionMisplaced, MetaVarExprUnrecognizedVar, MetaVarsDifSeqMatchers, MustRepeatOnce, NoSyntaxVarsExprRepeat, VarStillRepeating, }; -use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, MatchedTokenTree, NamedMatch}; +use crate::mbe::macro_parser::{NamedMatch, NamedMatch::*}; use crate::mbe::{self, KleeneOp, MetaVarExpr}; use rustc_ast::mut_visit::{self, MutVisitor}; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree}; use rustc_data_structures::fx::FxHashMap; -use rustc_errors::Diag; -use rustc_errors::{pluralize, PResult}; +use rustc_errors::{pluralize, Diag, PResult}; +use rustc_parse::parser::ParseNtResult; use rustc_span::hygiene::{LocalExpnId, Transparency}; use rustc_span::symbol::{sym, Ident, MacroRulesNormalizedIdent}; use rustc_span::{with_metavar_spans, Span, SyntaxContext}; @@ -251,12 +251,12 @@ pub(super) fn transcribe<'a>( let ident = MacroRulesNormalizedIdent::new(original_ident); if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { let tt = match cur_matched { - MatchedTokenTree(tt) => { + MatchedSingle(ParseNtResult::Tt(tt)) => { // `tt`s are emitted into the output stream directly as "raw tokens", // without wrapping them into groups. maybe_use_metavar_location(cx, &stack, sp, tt, &mut marker) } - MatchedNonterminal(nt) => { + MatchedSingle(ParseNtResult::Nt(nt)) => { // Other variables are emitted into the output stream as groups with // `Delimiter::Invisible` to maintain parsing priorities. // `Interpolated` is currently used for such groups in rustc parser. @@ -423,7 +423,7 @@ fn lookup_cur_matched<'a>( interpolations.get(&ident).map(|mut matched| { for &(idx, _) in repeats { match matched { - MatchedTokenTree(_) | MatchedNonterminal(_) => break, + MatchedSingle(_) => break, MatchedSeq(ads) => matched = ads.get(idx).unwrap(), } } @@ -513,7 +513,7 @@ fn lockstep_iter_size( let name = MacroRulesNormalizedIdent::new(*name); match lookup_cur_matched(name, interpolations, repeats) { Some(matched) => match matched { - MatchedTokenTree(_) | MatchedNonterminal(_) => LockstepIterSize::Unconstrained, + MatchedSingle(_) => LockstepIterSize::Unconstrained, MatchedSeq(ads) => LockstepIterSize::Constraint(ads.len(), name), }, _ => LockstepIterSize::Unconstrained, @@ -556,7 +556,7 @@ fn count_repetitions<'a>( // (or at the top-level of `matched` if no depth is given). fn count<'a>(depth_curr: usize, depth_max: usize, matched: &NamedMatch) -> PResult<'a, usize> { match matched { - MatchedTokenTree(_) | MatchedNonterminal(_) => Ok(1), + MatchedSingle(_) => Ok(1), MatchedSeq(named_matches) => { if depth_curr == depth_max { Ok(named_matches.len()) @@ -570,7 +570,7 @@ fn count_repetitions<'a>( /// Maximum depth fn depth(counter: usize, matched: &NamedMatch) -> usize { match matched { - MatchedTokenTree(_) | MatchedNonterminal(_) => counter, + MatchedSingle(_) => counter, MatchedSeq(named_matches) => { let rslt = counter + 1; if let Some(elem) = named_matches.first() { depth(rslt, elem) } else { rslt } @@ -598,7 +598,7 @@ fn count_repetitions<'a>( } } - if let MatchedTokenTree(_) | MatchedNonterminal(_) = matched { + if let MatchedSingle(_) = matched { return Err(cx.dcx().create_err(CountRepetitionMisplaced { span: sp.entire() })); } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 290093fe101de..c8b7d56c8566f 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -20,7 +20,7 @@ pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma}; pub use path::PathStyle; use rustc_ast::ptr::P; -use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind}; +use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::{AttributesData, DelimSpacing, DelimSpan, Spacing}; use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor}; use rustc_ast::util::case::Case; @@ -1572,8 +1572,21 @@ pub enum FlatToken { Empty, } -#[derive(Debug)] -pub enum ParseNtResult { - Nt(Nonterminal), +// Metavar captures of various kinds. +#[derive(Clone, Debug)] +pub enum ParseNtResult { Tt(TokenTree), + Nt(NtType), +} + +impl ParseNtResult { + pub fn map_nt(self, mut f: F) -> ParseNtResult + where + F: FnMut(T) -> U, + { + match self { + ParseNtResult::Tt(tt) => ParseNtResult::Tt(tt), + ParseNtResult::Nt(nt) => ParseNtResult::Nt(f(nt)), + } + } } diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index 1cba7107e9a04..36a00df7b4410 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -1,5 +1,5 @@ use rustc_ast::ptr::P; -use rustc_ast::token::{self, Delimiter, Nonterminal::*, NonterminalKind, Token}; +use rustc_ast::token::{self, Delimiter, Nonterminal, Nonterminal::*, NonterminalKind, Token}; use rustc_ast::HasTokens; use rustc_ast_pretty::pprust; use rustc_errors::PResult; @@ -100,7 +100,10 @@ impl<'a> Parser<'a> { /// Parse a non-terminal (e.g. MBE `:pat` or `:ident`). Inlined because there is only one call /// site. #[inline] - pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, ParseNtResult> { + pub fn parse_nonterminal( + &mut self, + kind: NonterminalKind, + ) -> PResult<'a, ParseNtResult> { // A `macro_rules!` invocation may pass a captured item/expr to a proc-macro, // which requires having captured tokens available. Since we cannot determine // in advance whether or not a proc-macro will be (transitively) invoked, From 82a609f9a6922a5adec4d71bff1f948d868d0384 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Mar 2024 09:47:39 +1100 Subject: [PATCH 12/12] Shrink the comment on `TokenTree`. It uses very old language that is more confusing today than helpful, including references to `SubstNt` that no longer exists. The comment above `TokenStream` is better, and suffices for a basic understanding of these types. --- compiler/rustc_ast/src/tokenstream.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index adc3056cc2921..239735456ad53 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -28,18 +28,7 @@ use smallvec::{smallvec, SmallVec}; use std::borrow::Cow; use std::{cmp, fmt, iter}; -/// When the main Rust parser encounters a syntax-extension invocation, it -/// parses the arguments to the invocation as a token tree. This is a very -/// loose structure, such that all sorts of different AST fragments can -/// be passed to syntax extensions using a uniform type. -/// -/// If the syntax extension is an MBE macro, it will attempt to match its -/// LHS token tree against the provided token tree, and if it finds a -/// match, will transcribe the RHS token tree, splicing in any captured -/// `macro_parser::matched_nonterminals` into the `SubstNt`s it finds. -/// -/// The RHS of an MBE macro is the only place `SubstNt`s are substituted. -/// Nothing special happens to misnamed or misplaced `SubstNt`s. +/// Part of a `TokenStream`. #[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)] pub enum TokenTree { /// A single token. Should never be `OpenDelim` or `CloseDelim`, because