diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 99034799b3c31..16224d71e4569 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -345,17 +345,14 @@ impl Token { } pub fn is_op(&self) -> bool { - !matches!( - self.kind, - OpenDelim(..) - | CloseDelim(..) - | Literal(..) - | DocComment(..) - | Ident(..) - | Lifetime(..) - | Interpolated(..) - | Eof - ) + match self.kind { + Eq | Lt | Le | EqEq | Ne | Ge | Gt | AndAnd | OrOr | Not | Tilde | BinOp(_) + | BinOpEq(_) | At | Dot | DotDot | DotDotDot | DotDotEq | Comma | Semi | Colon + | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question | SingleQuote => true, + + OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | Ident(..) + | Lifetime(..) | Interpolated(..) | Eof => false, + } } pub fn is_like_plus(&self) -> bool { diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 824206a99d8d3..4d2049cbc41ee 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -304,9 +304,20 @@ pub struct AttributesData { #[derive(Clone, Debug, Default, Encodable, Decodable)] pub struct TokenStream(pub(crate) Lrc>); +/// Similar to `proc_macro::Spacing`, but for tokens. +/// +/// Note that all `ast::TokenTree::Token` instances have a `Spacing`, but when +/// we convert to `proc_macro::TokenTree` for proc macros only `Punct` +/// `TokenTree`s have a `proc_macro::Spacing`. #[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] pub enum Spacing { + /// The token is not immediately followed by an operator token (as + /// determined by `Token::is_op`). E.g. a `+` token is `Alone` in `+ =`, + /// `+/*foo*/=`, `+ident`, and `+()`. Alone, + + /// The token is immediately followed by an operator token. E.g. a `+` + /// token is `Joint` in `+=` and `++`. Joint, } diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 59a7b668a83ce..ff09a9fb87a7f 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -110,10 +110,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec (token, spacing == Joint), }; + // Split the operator into one or more `Punct`s, one per character. + // The final one inherits the jointness of the original token. Any + // before that get `joint = true`. let mut op = |s: &str| { assert!(s.is_ascii()); - trees.extend(s.as_bytes().iter().enumerate().map(|(idx, &ch)| { - TokenTree::Punct(Punct { ch, joint: joint || idx != s.len() - 1, span }) + trees.extend(s.bytes().enumerate().map(|(idx, ch)| { + let is_final = idx == s.len() - 1; + TokenTree::Punct(Punct { ch, joint: if is_final { joint } else { true }, span }) })); }; diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index bcd078a89677e..88540e13ef243 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -52,7 +52,7 @@ pub(crate) fn parse_token_trees<'a>( let cursor = Cursor::new(src); let string_reader = StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span }; - tokentrees::TokenTreesReader::parse_token_trees(string_reader) + tokentrees::TokenTreesReader::parse_all_token_trees(string_reader) } struct StringReader<'a> { diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index 364753154db05..b2701817d489b 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -27,7 +27,7 @@ pub(super) struct TokenTreesReader<'a> { } impl<'a> TokenTreesReader<'a> { - pub(super) fn parse_token_trees( + pub(super) fn parse_all_token_trees( string_reader: StringReader<'a>, ) -> (PResult<'a, TokenStream>, Vec) { let mut tt_reader = TokenTreesReader { @@ -40,36 +40,51 @@ impl<'a> TokenTreesReader<'a> { last_delim_empty_block_spans: FxHashMap::default(), matching_block_spans: Vec::new(), }; - let res = tt_reader.parse_all_token_trees(); + let res = tt_reader.parse_token_trees(/* is_delimited */ false); (res, tt_reader.unmatched_braces) } - // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. - fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { + // Parse a stream of tokens into a list of `TokenTree`s. + fn parse_token_trees(&mut self, is_delimited: bool) -> PResult<'a, TokenStream> { self.token = self.string_reader.next_token().0; - let mut buf = TokenStreamBuilder::default(); + let mut buf = Vec::new(); loop { match self.token.kind { token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)), - token::CloseDelim(delim) => return Err(self.close_delim_err(delim)), - token::Eof => return Ok(buf.into_token_stream()), - _ => buf.push(self.parse_token_tree_non_delim_non_eof()), - } - } - } - - // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. - fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { - let mut buf = TokenStreamBuilder::default(); - loop { - match self.token.kind { - token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)), - token::CloseDelim(..) => return buf.into_token_stream(), + token::CloseDelim(delim) => { + return if is_delimited { + Ok(TokenStream::new(buf)) + } else { + Err(self.close_delim_err(delim)) + }; + } token::Eof => { - self.eof_err().emit(); - return buf.into_token_stream(); + if is_delimited { + self.eof_err().emit(); + } + return Ok(TokenStream::new(buf)); + } + _ => { + // Get the next normal token. This might require getting multiple adjacent + // single-char tokens and joining them together. + let (this_spacing, next_tok) = loop { + let (next_tok, is_next_tok_preceded_by_whitespace) = + self.string_reader.next_token(); + if !is_next_tok_preceded_by_whitespace { + if let Some(glued) = self.token.glue(&next_tok) { + self.token = glued; + } else { + let this_spacing = + if next_tok.is_op() { Spacing::Joint } else { Spacing::Alone }; + break (this_spacing, next_tok); + } + } else { + break (Spacing::Alone, next_tok); + } + }; + let this_tok = std::mem::replace(&mut self.token, next_tok); + buf.push(TokenTree::Token(this_tok, this_spacing)); } - _ => buf.push(self.parse_token_tree_non_delim_non_eof()), } } } @@ -113,14 +128,12 @@ impl<'a> TokenTreesReader<'a> { // The span for beginning of the delimited section let pre_span = self.token.span; - // Move past the open delimiter. self.open_braces.push((open_delim, self.token.span)); - self.token = self.string_reader.next_token().0; // Parse the token trees within the delimiters. // We stop at any delimiter so we can try to recover if the user // uses an incorrect delimiter. - let tts = self.parse_token_trees_until_close_delim(); + let tts = self.parse_token_trees(/* is_delimited */ true).unwrap(); // Expand to cover the entire delimited token tree let delim_span = DelimSpan::from_pair(pre_span, self.token.span); @@ -242,43 +255,4 @@ impl<'a> TokenTreesReader<'a> { err.span_label(self.token.span, "unexpected closing delimiter"); err } - - #[inline] - fn parse_token_tree_non_delim_non_eof(&mut self) -> TokenTree { - // `this_spacing` for the returned token refers to whether the token is - // immediately followed by another op token. It is determined by the - // next token: its kind and its `preceded_by_whitespace` status. - let (next_tok, is_next_tok_preceded_by_whitespace) = self.string_reader.next_token(); - let this_spacing = if is_next_tok_preceded_by_whitespace || !next_tok.is_op() { - Spacing::Alone - } else { - Spacing::Joint - }; - let this_tok = std::mem::replace(&mut self.token, next_tok); - TokenTree::Token(this_tok, this_spacing) - } -} - -#[derive(Default)] -struct TokenStreamBuilder { - buf: Vec, -} - -impl TokenStreamBuilder { - #[inline(always)] - fn push(&mut self, tree: TokenTree) { - if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last() - && let TokenTree::Token(token, joint) = &tree - && let Some(glued) = prev_token.glue(token) - { - self.buf.pop(); - self.buf.push(TokenTree::Token(glued, *joint)); - } else { - self.buf.push(tree) - } - } - - fn into_token_stream(self) -> TokenStream { - TokenStream::new(self.buf) - } } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 2aebaf7c3af2a..b934e087608a5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -302,7 +302,10 @@ impl TokenCursor { fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { // Searches for the occurrences of `"#*` and returns the minimum number of `#`s - // required to wrap the text. + // required to wrap the text. E.g. + // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) + // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1) + // - `abc "##d##"` is wrapped as `r###"abc "d""###` (num_of_hashes = 3) let mut num_of_hashes = 0; let mut count = 0; for ch in data.as_str().chars() { @@ -314,6 +317,7 @@ impl TokenCursor { num_of_hashes = cmp::max(num_of_hashes, count); } + // `/// foo` becomes `doc = r"foo". let delim_span = DelimSpan::from_single(span); let body = TokenTree::Delimited( delim_span,