Skip to content

Commit 815dc9c

Browse files
authored
Rollup merge of #107544 - nnethercote:improve-TokenCursor, r=petrochenkov
Improve `TokenCursor`. Some small improvements, for things that were bugging me. Best reviewed one commit at a time. r? ``@petrochenkov``
2 parents d9db357 + a86fc72 commit 815dc9c

File tree

6 files changed

+98
-91
lines changed

6 files changed

+98
-91
lines changed

compiler/rustc_ast/src/tokenstream.rs

+27-15
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ use std::{fmt, iter};
4141
/// Nothing special happens to misnamed or misplaced `SubstNt`s.
4242
#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
4343
pub enum TokenTree {
44-
/// A single token.
44+
/// A single token. Should never be `OpenDelim` or `CloseDelim`, because
45+
/// delimiters are implicitly represented by `Delimited`.
4546
Token(Token, Spacing),
4647
/// A delimited sequence of token trees.
4748
Delimited(DelimSpan, Delimiter, TokenStream),
@@ -388,12 +389,12 @@ impl TokenStream {
388389
self.0.len()
389390
}
390391

391-
pub fn trees(&self) -> CursorRef<'_> {
392-
CursorRef::new(self)
392+
pub fn trees(&self) -> RefTokenTreeCursor<'_> {
393+
RefTokenTreeCursor::new(self)
393394
}
394395

395-
pub fn into_trees(self) -> Cursor {
396-
Cursor::new(self)
396+
pub fn into_trees(self) -> TokenTreeCursor {
397+
TokenTreeCursor::new(self)
397398
}
398399

399400
/// Compares two `TokenStream`s, checking equality without regarding span information.
@@ -551,24 +552,25 @@ impl TokenStream {
551552
}
552553
}
553554

554-
/// By-reference iterator over a [`TokenStream`].
555+
/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
556+
/// items.
555557
#[derive(Clone)]
556-
pub struct CursorRef<'t> {
558+
pub struct RefTokenTreeCursor<'t> {
557559
stream: &'t TokenStream,
558560
index: usize,
559561
}
560562

561-
impl<'t> CursorRef<'t> {
563+
impl<'t> RefTokenTreeCursor<'t> {
562564
fn new(stream: &'t TokenStream) -> Self {
563-
CursorRef { stream, index: 0 }
565+
RefTokenTreeCursor { stream, index: 0 }
564566
}
565567

566568
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
567569
self.stream.0.get(self.index + n)
568570
}
569571
}
570572

571-
impl<'t> Iterator for CursorRef<'t> {
573+
impl<'t> Iterator for RefTokenTreeCursor<'t> {
572574
type Item = &'t TokenTree;
573575

574576
fn next(&mut self) -> Option<&'t TokenTree> {
@@ -579,15 +581,16 @@ impl<'t> Iterator for CursorRef<'t> {
579581
}
580582
}
581583

582-
/// Owning by-value iterator over a [`TokenStream`].
584+
/// Owning by-value iterator over a [`TokenStream`], that produces `TokenTree`
585+
/// items.
583586
// FIXME: Many uses of this can be replaced with by-reference iterator to avoid clones.
584587
#[derive(Clone)]
585-
pub struct Cursor {
588+
pub struct TokenTreeCursor {
586589
pub stream: TokenStream,
587590
index: usize,
588591
}
589592

590-
impl Iterator for Cursor {
593+
impl Iterator for TokenTreeCursor {
591594
type Item = TokenTree;
592595

593596
fn next(&mut self) -> Option<TokenTree> {
@@ -598,9 +601,9 @@ impl Iterator for Cursor {
598601
}
599602
}
600603

601-
impl Cursor {
604+
impl TokenTreeCursor {
602605
fn new(stream: TokenStream) -> Self {
603-
Cursor { stream, index: 0 }
606+
TokenTreeCursor { stream, index: 0 }
604607
}
605608

606609
#[inline]
@@ -614,6 +617,15 @@ impl Cursor {
614617
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
615618
self.stream.0.get(self.index + n)
616619
}
620+
621+
// Replace the previously obtained token tree with `tts`, and rewind to
622+
// just before them.
623+
pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
624+
assert!(self.index > 0);
625+
self.index -= 1;
626+
let stream = Lrc::make_mut(&mut self.stream.0);
627+
stream.splice(self.index..self.index + 1, tts);
628+
}
617629
}
618630

619631
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]

compiler/rustc_expand/src/mbe/metavar_expr.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use rustc_ast::token::{self, Delimiter};
2-
use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree};
2+
use rustc_ast::tokenstream::{RefTokenTreeCursor, TokenStream, TokenTree};
33
use rustc_ast::{LitIntType, LitKind};
44
use rustc_ast_pretty::pprust;
55
use rustc_errors::{Applicability, PResult};
@@ -72,7 +72,7 @@ impl MetaVarExpr {
7272

7373
// Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}`
7474
fn check_trailing_token<'sess>(
75-
iter: &mut CursorRef<'_>,
75+
iter: &mut RefTokenTreeCursor<'_>,
7676
sess: &'sess ParseSess,
7777
) -> PResult<'sess, ()> {
7878
if let Some(tt) = iter.next() {
@@ -88,7 +88,7 @@ fn check_trailing_token<'sess>(
8888

8989
/// Parse a meta-variable `count` expression: `count(ident[, depth])`
9090
fn parse_count<'sess>(
91-
iter: &mut CursorRef<'_>,
91+
iter: &mut RefTokenTreeCursor<'_>,
9292
sess: &'sess ParseSess,
9393
span: Span,
9494
) -> PResult<'sess, MetaVarExpr> {
@@ -99,7 +99,7 @@ fn parse_count<'sess>(
9999

100100
/// Parses the depth used by index(depth) and length(depth).
101101
fn parse_depth<'sess>(
102-
iter: &mut CursorRef<'_>,
102+
iter: &mut RefTokenTreeCursor<'_>,
103103
sess: &'sess ParseSess,
104104
span: Span,
105105
) -> PResult<'sess, usize> {
@@ -126,7 +126,7 @@ fn parse_depth<'sess>(
126126

127127
/// Parses an generic ident
128128
fn parse_ident<'sess>(
129-
iter: &mut CursorRef<'_>,
129+
iter: &mut RefTokenTreeCursor<'_>,
130130
sess: &'sess ParseSess,
131131
span: Span,
132132
) -> PResult<'sess, Ident> {
@@ -152,7 +152,7 @@ fn parse_ident<'sess>(
152152

153153
/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
154154
/// iterator is not modified and the result is `false`.
155-
fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool {
155+
fn try_eat_comma(iter: &mut RefTokenTreeCursor<'_>) -> bool {
156156
if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) {
157157
let _ = iter.next();
158158
return true;

compiler/rustc_parse/src/parser/attr_wrapper.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,6 @@ mod size_asserts {
469469
use rustc_data_structures::static_assert_size;
470470
// tidy-alphabetical-start
471471
static_assert_size!(AttrWrapper, 16);
472-
static_assert_size!(LazyAttrTokenStreamImpl, 144);
472+
static_assert_size!(LazyAttrTokenStreamImpl, 120);
473473
// tidy-alphabetical-end
474474
}

compiler/rustc_parse/src/parser/expr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2141,7 +2141,7 @@ impl<'a> Parser<'a> {
21412141
}
21422142

21432143
if self.token.kind == TokenKind::Semi
2144-
&& matches!(self.token_cursor.frame.delim_sp, Some((Delimiter::Parenthesis, _)))
2144+
&& matches!(self.token_cursor.stack.last(), Some((_, Delimiter::Parenthesis, _)))
21452145
&& self.may_recover()
21462146
{
21472147
// It is likely that the closure body is a block but where the

compiler/rustc_parse/src/parser/mod.rs

+59-64
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@ pub use path::PathStyle;
1919

2020
use rustc_ast::ptr::P;
2121
use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind};
22-
use rustc_ast::tokenstream::AttributesData;
23-
use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
24-
use rustc_ast::tokenstream::{TokenStream, TokenTree};
22+
use rustc_ast::tokenstream::{AttributesData, DelimSpan, Spacing};
23+
use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
2524
use rustc_ast::util::case::Case;
2625
use rustc_ast::AttrId;
2726
use rustc_ast::DUMMY_NODE_ID;
@@ -168,7 +167,7 @@ pub struct Parser<'a> {
168167
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
169168
// it doesn't unintentionally get bigger.
170169
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
171-
rustc_data_structures::static_assert_size!(Parser<'_>, 336);
170+
rustc_data_structures::static_assert_size!(Parser<'_>, 312);
172171

173172
/// Stores span information about a closure.
174173
#[derive(Clone)]
@@ -221,18 +220,27 @@ impl<'a> Drop for Parser<'a> {
221220
}
222221
}
223222

223+
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
224+
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
225+
/// use this type to emit them as a linear sequence. But a linear sequence is
226+
/// what the parser expects, for the most part.
224227
#[derive(Clone)]
225228
struct TokenCursor {
226-
// The current (innermost) frame. `frame` and `stack` could be combined,
227-
// but it's faster to have them separately to access `frame` directly
228-
// rather than via something like `stack.last().unwrap()` or
229-
// `stack[stack.len() - 1]`.
230-
frame: TokenCursorFrame,
231-
// Additional frames that enclose `frame`.
232-
stack: Vec<TokenCursorFrame>,
229+
// Cursor for the current (innermost) token stream. The delimiters for this
230+
// token stream are found in `self.stack.last()`; when that is `None` then
231+
// we are in the outermost token stream which never has delimiters.
232+
tree_cursor: TokenTreeCursor,
233+
234+
// Token streams surrounding the current one. The delimiters for stack[n]'s
235+
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
236+
// because it's the outermost token stream which never has delimiters.
237+
stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
238+
233239
desugar_doc_comments: bool,
240+
234241
// Counts the number of calls to `{,inlined_}next`.
235242
num_next_calls: usize,
243+
236244
// During parsing, we may sometimes need to 'unglue' a
237245
// glued token into two component tokens
238246
// (e.g. '>>' into '>' and '>), so that the parser
@@ -257,18 +265,6 @@ struct TokenCursor {
257265
break_last_token: bool,
258266
}
259267

260-
#[derive(Clone)]
261-
struct TokenCursorFrame {
262-
delim_sp: Option<(Delimiter, DelimSpan)>,
263-
tree_cursor: tokenstream::Cursor,
264-
}
265-
266-
impl TokenCursorFrame {
267-
fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self {
268-
TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() }
269-
}
270-
}
271-
272268
impl TokenCursor {
273269
fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
274270
self.inlined_next(desugar_doc_comments)
@@ -281,38 +277,47 @@ impl TokenCursor {
281277
// FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
282278
// need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
283279
// removed.
284-
if let Some(tree) = self.frame.tree_cursor.next_ref() {
280+
if let Some(tree) = self.tree_cursor.next_ref() {
285281
match tree {
286282
&TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
287283
(true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
288-
return self.desugar(attr_style, data, span);
284+
let desugared = self.desugar(attr_style, data, span);
285+
self.tree_cursor.replace_prev_and_rewind(desugared);
286+
// Continue to get the first token of the desugared doc comment.
287+
}
288+
_ => {
289+
debug_assert!(!matches!(
290+
token.kind,
291+
token::OpenDelim(_) | token::CloseDelim(_)
292+
));
293+
return (token.clone(), spacing);
289294
}
290-
_ => return (token.clone(), spacing),
291295
},
292296
&TokenTree::Delimited(sp, delim, ref tts) => {
293-
// Set `open_delim` to true here because we deal with it immediately.
294-
let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone());
295-
self.stack.push(mem::replace(&mut self.frame, frame));
297+
let trees = tts.clone().into_trees();
298+
self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
296299
if delim != Delimiter::Invisible {
297300
return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
298301
}
299302
// No open delimiter to return; continue on to the next iteration.
300303
}
301304
};
302-
} else if let Some(frame) = self.stack.pop() {
303-
if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible {
304-
self.frame = frame;
305+
} else if let Some((tree_cursor, delim, span)) = self.stack.pop() {
306+
// We have exhausted this token stream. Move back to its parent token stream.
307+
self.tree_cursor = tree_cursor;
308+
if delim != Delimiter::Invisible {
305309
return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
306310
}
307-
self.frame = frame;
308311
// No close delimiter to return; continue on to the next iteration.
309312
} else {
313+
// We have exhausted the outermost token stream.
310314
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
311315
}
312316
}
313317
}
314318

315-
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
319+
// Desugar a doc comment into something like `#[doc = r"foo"]`.
320+
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
316321
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
317322
// required to wrap the text. E.g.
318323
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
@@ -346,27 +351,15 @@ impl TokenCursor {
346351
.collect::<TokenStream>(),
347352
);
348353

349-
self.stack.push(mem::replace(
350-
&mut self.frame,
351-
TokenCursorFrame::new(
352-
None,
353-
if attr_style == AttrStyle::Inner {
354-
[
355-
TokenTree::token_alone(token::Pound, span),
356-
TokenTree::token_alone(token::Not, span),
357-
body,
358-
]
359-
.into_iter()
360-
.collect::<TokenStream>()
361-
} else {
362-
[TokenTree::token_alone(token::Pound, span), body]
363-
.into_iter()
364-
.collect::<TokenStream>()
365-
},
366-
),
367-
));
368-
369-
self.next(/* desugar_doc_comments */ false)
354+
if attr_style == AttrStyle::Inner {
355+
vec![
356+
TokenTree::token_alone(token::Pound, span),
357+
TokenTree::token_alone(token::Not, span),
358+
body,
359+
]
360+
} else {
361+
vec![TokenTree::token_alone(token::Pound, span), body]
362+
}
370363
}
371364
}
372365

@@ -475,7 +468,7 @@ impl<'a> Parser<'a> {
475468
restrictions: Restrictions::empty(),
476469
expected_tokens: Vec::new(),
477470
token_cursor: TokenCursor {
478-
frame: TokenCursorFrame::new(None, tokens),
471+
tree_cursor: tokens.into_trees(),
479472
stack: Vec::new(),
480473
num_next_calls: 0,
481474
desugar_doc_comments,
@@ -1142,14 +1135,16 @@ impl<'a> Parser<'a> {
11421135
return looker(&self.token);
11431136
}
11441137

1145-
let frame = &self.token_cursor.frame;
1146-
if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible {
1138+
let tree_cursor = &self.token_cursor.tree_cursor;
1139+
if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
1140+
&& delim != Delimiter::Invisible
1141+
{
11471142
let all_normal = (0..dist).all(|i| {
1148-
let token = frame.tree_cursor.look_ahead(i);
1143+
let token = tree_cursor.look_ahead(i);
11491144
!matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
11501145
});
11511146
if all_normal {
1152-
return match frame.tree_cursor.look_ahead(dist - 1) {
1147+
return match tree_cursor.look_ahead(dist - 1) {
11531148
Some(tree) => match tree {
11541149
TokenTree::Token(token, _) => looker(token),
11551150
TokenTree::Delimited(dspan, delim, _) => {
@@ -1310,10 +1305,10 @@ impl<'a> Parser<'a> {
13101305
pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
13111306
match self.token.kind {
13121307
token::OpenDelim(..) => {
1313-
// Grab the tokens from this frame.
1314-
let frame = &self.token_cursor.frame;
1315-
let stream = frame.tree_cursor.stream.clone();
1316-
let (delim, span) = frame.delim_sp.unwrap();
1308+
// Grab the tokens within the delimiters.
1309+
let tree_cursor = &self.token_cursor.tree_cursor;
1310+
let stream = tree_cursor.stream.clone();
1311+
let (_, delim, span) = *self.token_cursor.stack.last().unwrap();
13171312

13181313
// Advance the token cursor through the entire delimited
13191314
// sequence. After getting the `OpenDelim` we are *within* the

0 commit comments

Comments
 (0)