Skip to content

Commit cb56a44

Browse files
committed
Auto merge of #79338 - Aaron1011:fix/token-reparse-cache, r=petrochenkov
Cache pretty-print/retokenize result to avoid compile time blowup Fixes #79242 If a `macro_rules!` recursively builds up a nested nonterminal (passing it to a proc-macro at each step), we will end up repeatedly pretty-printing/retokenizing the same nonterminals. Unfortunately, the 'probable equality' check we do has a non-trivial cost, which leads to a blowup in compilation time. As a workaround, we cache the result of the 'probable equality' check, which eliminates the compilation time blowup for the linked issue. This commit only touches a single file (other than adding tests), so it should be easy to backport. The proper solution is to remove the pretty-print/retokenize hack entirely. However, this will almost certainly break a large number of crates that were relying on hygiene bugs created by using the reparsed `TokenStream`. As a result, we will definitely not want to backport such a change.
2 parents 72da5a9 + 6e466ef commit cb56a44

File tree

3 files changed

+85
-3
lines changed

3 files changed

+85
-3
lines changed

compiler/rustc_parse/src/lib.rs

+35-3
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ use rustc_ast as ast;
99
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
1010
use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree};
1111
use rustc_ast_pretty::pprust;
12+
use rustc_data_structures::fx::FxHashSet;
1213
use rustc_data_structures::sync::Lrc;
1314
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
1415
use rustc_session::parse::ParseSess;
1516
use rustc_span::{symbol::kw, FileName, SourceFile, Span, DUMMY_SP};
1617

1718
use smallvec::SmallVec;
19+
use std::cell::RefCell;
1820
use std::mem;
1921
use std::path::Path;
2022
use std::str;
@@ -281,14 +283,33 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
281283
}
282284
};
283285

286+
// Caches the stringification of 'good' `TokenStreams` which passed
287+
// `tokenstream_probably_equal_for_proc_macro`. This allows us to avoid
288+
// repeatedly stringifying and comparing the same `TokenStream` for deeply
289+
// nested nonterminals.
290+
//
291+
// We cache by the strinification instead of the `TokenStream` to avoid
292+
// needing to implement `Hash` for `TokenStream`. Note that it's possible to
293+
// have two distinct `TokenStream`s that stringify to the same result
294+
// (e.g. if they differ only in hygiene information). However, any
295+
// information lost during the stringification process is also intentionally
296+
// ignored by `tokenstream_probably_equal_for_proc_macro`, so it's fine
297+
// that a single cache entry may 'map' to multiple distinct `TokenStream`s.
298+
//
299+
// This is a temporary hack to prevent compilation blowup on certain inputs.
300+
// The entire pretty-print/retokenize process will be removed soon.
301+
thread_local! {
302+
static GOOD_TOKEN_CACHE: RefCell<FxHashSet<String>> = Default::default();
303+
}
304+
284305
// FIXME(#43081): Avoid this pretty-print + reparse hack
285306
// Pretty-print the AST struct without inserting any parenthesis
286307
// beyond those explicitly written by the user (e.g. `ExpnKind::Paren`).
287308
// The resulting stream may have incorrect precedence, but it's only
288309
// ever used for a comparison against the capture tokenstream.
289310
let source = pprust::nonterminal_to_string_no_extra_parens(nt);
290311
let filename = FileName::macro_expansion_source_code(&source);
291-
let reparsed_tokens = parse_stream_from_source_str(filename, source, sess, Some(span));
312+
let reparsed_tokens = parse_stream_from_source_str(filename, source.clone(), sess, Some(span));
292313

293314
// During early phases of the compiler the AST could get modified
294315
// directly (e.g., attributes added or removed) and the internal cache
@@ -314,8 +335,13 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
314335
// modifications, including adding/removing typically non-semantic
315336
// tokens such as extra braces and commas, don't happen.
316337
if let Some(tokens) = tokens {
338+
if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source)) {
339+
return tokens;
340+
}
341+
317342
// Compare with a non-relaxed delim match to start.
318343
if tokenstream_probably_equal_for_proc_macro(&tokens, &reparsed_tokens, sess, false) {
344+
GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone()));
319345
return tokens;
320346
}
321347

@@ -324,6 +350,11 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
324350
// token stream to match up with inserted parenthesis in the reparsed stream.
325351
let source_with_parens = pprust::nonterminal_to_string(nt);
326352
let filename_with_parens = FileName::macro_expansion_source_code(&source_with_parens);
353+
354+
if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source_with_parens)) {
355+
return tokens;
356+
}
357+
327358
let reparsed_tokens_with_parens = parse_stream_from_source_str(
328359
filename_with_parens,
329360
source_with_parens,
@@ -339,6 +370,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
339370
sess,
340371
true,
341372
) {
373+
GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone()));
342374
return tokens;
343375
}
344376

@@ -418,9 +450,9 @@ pub fn tokenstream_probably_equal_for_proc_macro(
418450
// to iterate breaking tokens mutliple times. For example:
419451
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
420452
let mut token_trees: SmallVec<[_; 2]>;
421-
if let TokenTree::Token(token) = &tree {
453+
if let TokenTree::Token(token) = tree {
422454
let mut out = SmallVec::<[_; 2]>::new();
423-
out.push(token.clone());
455+
out.push(token);
424456
// Iterate to fixpoint:
425457
// * We start off with 'out' containing our initial token, and `temp` empty
426458
// * If we are able to break any tokens in `out`, then `out` will have
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// force-host
2+
// no-prefer-dynamic
3+
4+
#![crate_type = "proc-macro"]
5+
6+
extern crate proc_macro;
7+
8+
use proc_macro::TokenStream;
9+
10+
#[proc_macro]
11+
pub fn dummy(input: TokenStream) -> TokenStream {
12+
// Iterate to force internal conversion of nonterminals
13+
// to `proc_macro` structs
14+
for _ in input {}
15+
TokenStream::new()
16+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// check-pass
2+
// aux-build:issue-79242.rs
3+
4+
// Regression test for issue #79242
5+
// Tests that compilation time doesn't blow up for a proc-macro
6+
// invocation with deeply nested nonterminals
7+
8+
#![allow(unused)]
9+
10+
extern crate issue_79242;
11+
12+
macro_rules! declare_nats {
13+
($prev:ty) => {};
14+
($prev:ty, $n:literal$(, $tail:literal)*) => {
15+
16+
issue_79242::dummy! {
17+
$prev
18+
}
19+
20+
declare_nats!(Option<$prev>$(, $tail)*);
21+
};
22+
(0, $($n:literal),+) => {
23+
pub struct N0;
24+
declare_nats!(N0, $($n),+);
25+
};
26+
}
27+
28+
declare_nats! {
29+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
30+
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
31+
}
32+
33+
34+
fn main() {}

0 commit comments

Comments
 (0)