Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 8b22b49

Browse files
committed
Auto merge of rust-lang#129346 - nnethercote:fix-double-handling-in-collect_tokens, r=<try>
Fix double handling in `collect_tokens` Double handling of AST nodes can occur in `collect_tokens`. This is when an inner call to `collect_tokens` produces an AST node, and then an outer call to `collect_tokens` produces the same AST node. This can happen in a few places, e.g. expression statements where the statement delegates `HasTokens` and `HasAttrs` to the expression. It will also happen more after rust-lang#124141. This PR fixes some double handling cases that cause problems, including rust-lang#129166. r? `@petrochenkov`
2 parents 5aea140 + 7d65809 commit 8b22b49

File tree

7 files changed

+163
-164
lines changed

7 files changed

+163
-164
lines changed

compiler/rustc_parse/src/parser/attr_wrapper.rs

Lines changed: 53 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -134,30 +134,17 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
134134
node_replacements.array_windows()
135135
{
136136
assert!(
137-
node_range.0.end <= next_node_range.0.start
138-
|| node_range.0.end >= next_node_range.0.end,
139-
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
137+
node_range.0.end <= next_node_range.0.start,
138+
"Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})",
140139
node_range,
141140
tokens,
142141
next_node_range,
143142
next_tokens,
144143
);
145144
}
146145

147-
// Process the replace ranges, starting from the highest start
148-
// position and working our way back. If have tokens like:
149-
//
150-
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
151-
//
152-
// Then we will generate replace ranges for both
153-
// the `#[cfg(FALSE)] field: bool` and the entire
154-
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
155-
//
156-
// By starting processing from the replace range with the greatest
157-
// start position, we ensure that any (outer) replace range which
158-
// encloses another (inner) replace range will fully overwrite the
159-
// inner range's replacement.
160-
for (node_range, target) in node_replacements.into_iter().rev() {
146+
// Process the replace ranges.
147+
for (node_range, target) in node_replacements.into_iter() {
161148
assert!(
162149
!node_range.0.is_empty(),
163150
"Cannot replace an empty node range: {:?}",
@@ -234,6 +221,8 @@ impl<'a> Parser<'a> {
234221
force_collect: ForceCollect,
235222
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
236223
) -> PResult<'a, R> {
224+
let possible_capture_mode = self.capture_cfg;
225+
237226
// We must collect if anything could observe the collected tokens, i.e.
238227
// if any of the following conditions hold.
239228
// - We are force collecting tokens (because force collection requires
@@ -244,9 +233,9 @@ impl<'a> Parser<'a> {
244233
// - Our target supports custom inner attributes (custom
245234
// inner attribute invocation might require token capturing).
246235
|| R::SUPPORTS_CUSTOM_INNER_ATTRS
247-
// - We are in `capture_cfg` mode (which requires tokens if
236+
// - We are in "possible capture mode" (which requires tokens if
248237
// the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
249-
|| self.capture_cfg;
238+
|| possible_capture_mode;
250239
if !needs_collection {
251240
return Ok(f(self, attrs.attrs)?.0);
252241
}
@@ -267,7 +256,21 @@ impl<'a> Parser<'a> {
267256
res?
268257
};
269258

270-
// When we're not in `capture_cfg` mode, then skip collecting and
259+
// Ignore any attributes we've previously processed. This happens when
260+
// an inner call to `collect_tokens` returns an AST node and then an
261+
// outer call ends up with the same AST node without any additional
262+
// wrapping layer.
263+
let ret_attrs: AttrVec = ret
264+
.attrs()
265+
.iter()
266+
.cloned()
267+
.filter(|attr| {
268+
let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
269+
is_unseen
270+
})
271+
.collect();
272+
273+
// When we're not in "definite capture mode", then skip collecting and
271274
// return early if either of the following conditions hold.
272275
// - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
273276
// - `Some(Some(_))`: Our target already has tokens set (e.g. we've
@@ -278,7 +281,10 @@ impl<'a> Parser<'a> {
278281
// Note that this check is independent of `force_collect`. There's no
279282
// need to collect tokens when we don't support tokens or already have
280283
// tokens.
281-
if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
284+
let definite_capture_mode = self.capture_cfg
285+
&& matches!(self.capture_state.capturing, Capturing::Yes)
286+
&& has_cfg_or_cfg_attr(&ret_attrs);
287+
if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) {
282288
return Ok(ret);
283289
}
284290

@@ -297,12 +303,12 @@ impl<'a> Parser<'a> {
297303
// outer and inner attributes. So this check is more precise than
298304
// the earlier `needs_tokens` check, and we don't need to
299305
// check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
300-
|| needs_tokens(ret.attrs())
301-
// - We are in `capture_cfg` mode and there are `#[cfg]` or
302-
// `#[cfg_attr]` attributes. (During normal non-`capture_cfg`
303-
// parsing, we don't need any special capturing for those
304-
// attributes, because they're builtin.)
305-
|| (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()));
306+
|| needs_tokens(&ret_attrs)
307+
// - We are in "definite capture mode", which requires that there
308+
// are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
309+
// non-`capture_cfg` parsing, we don't need any special capturing
310+
// for those attributes, because they're builtin.)
311+
|| definite_capture_mode;
306312
if !needs_collection {
307313
return Ok(ret);
308314
}
@@ -336,7 +342,7 @@ impl<'a> Parser<'a> {
336342
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
337343
// which means the relevant tokens will be removed. (More details below.)
338344
let mut inner_attr_parser_replacements = Vec::new();
339-
for attr in ret.attrs() {
345+
for attr in ret_attrs.iter() {
340346
if attr.style == ast::AttrStyle::Inner {
341347
if let Some(inner_attr_parser_range) =
342348
self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
@@ -359,11 +365,10 @@ impl<'a> Parser<'a> {
359365
// from `ParserRange` form to `NodeRange` form. We will perform the actual
360366
// replacement only when we convert the `LazyAttrTokenStream` to an
361367
// `AttrTokenStream`.
362-
self.capture_state.parser_replacements
363-
[parser_replacements_start..parser_replacements_end]
364-
.iter()
365-
.cloned()
366-
.chain(inner_attr_parser_replacements.iter().cloned())
368+
self.capture_state
369+
.parser_replacements
370+
.drain(parser_replacements_start..parser_replacements_end)
371+
.chain(inner_attr_parser_replacements.into_iter())
367372
.map(|(parser_range, data)| {
368373
(NodeRange::new(parser_range, collect_pos.start_pos), data)
369374
})
@@ -399,20 +404,18 @@ impl<'a> Parser<'a> {
399404
break_last_token: self.break_last_token,
400405
node_replacements,
401406
});
407+
let mut tokens_used = false;
402408

403409
// If we support tokens and don't already have them, store the newly captured tokens.
404410
if let Some(target_tokens @ None) = ret.tokens_mut() {
411+
tokens_used = true;
405412
*target_tokens = Some(tokens.clone());
406413
}
407414

408-
// If `capture_cfg` is set and we're inside a recursive call to
409-
// `collect_tokens`, then we need to register a replace range if we
410-
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
411-
// cfg-expansion on the captured token stream.
412-
if self.capture_cfg
413-
&& matches!(self.capture_state.capturing, Capturing::Yes)
414-
&& has_cfg_or_cfg_attr(ret.attrs())
415-
{
415+
// If in "definite capture mode" we need to register a replace range
416+
// for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
417+
// eager cfg-expansion on the captured token stream.
418+
if definite_capture_mode {
416419
assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
417420

418421
// What is the status here when parsing the example code at the top of this method?
@@ -429,7 +432,8 @@ impl<'a> Parser<'a> {
429432
// cfg-expand this AST node.
430433
let start_pos =
431434
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
432-
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
435+
let target = AttrsTarget { attrs: ret_attrs, tokens };
436+
tokens_used = true;
433437
self.capture_state
434438
.parser_replacements
435439
.push((ParserRange(start_pos..end_pos), Some(target)));
@@ -438,7 +442,9 @@ impl<'a> Parser<'a> {
438442
// the outermost call to this method.
439443
self.capture_state.parser_replacements.clear();
440444
self.capture_state.inner_attr_parser_ranges.clear();
445+
self.capture_state.seen_attrs.clear();
441446
}
447+
assert!(tokens_used); // check we didn't create `tokens` unnecessarily
442448
Ok(ret)
443449
}
444450
}
@@ -510,9 +516,11 @@ fn make_attr_token_stream(
510516
}
511517

512518
/// Tokens are needed if:
513-
/// - any non-single-segment attributes (other than doc comments) are present; or
514-
/// - any `cfg_attr` attributes are present;
515-
/// - any single-segment, non-builtin attributes are present.
519+
/// - any non-single-segment attributes (other than doc comments) are present,
520+
/// e.g. `rustfmt::skip`; or
521+
/// - any `cfg_attr` attributes are present; or
522+
/// - any single-segment, non-builtin attributes are present, e.g. `derive`,
523+
/// `test`, `global_allocator`.
516524
fn needs_tokens(attrs: &[ast::Attribute]) -> bool {
517525
attrs.iter().any(|attr| match attr.ident() {
518526
None => !attr.is_doc_comment(),

compiler/rustc_parse/src/parser/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use rustc_ast::{
3232
VisibilityKind, DUMMY_NODE_ID,
3333
};
3434
use rustc_ast_pretty::pprust;
35-
use rustc_data_structures::fx::FxHashMap;
35+
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
3636
use rustc_data_structures::sync::Lrc;
3737
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
3838
use rustc_session::parse::ParseSess;
@@ -183,7 +183,7 @@ pub struct Parser<'a> {
183183
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
184184
// it doesn't unintentionally get bigger.
185185
#[cfg(target_pointer_width = "64")]
186-
rustc_data_structures::static_assert_size!(Parser<'_>, 256);
186+
rustc_data_structures::static_assert_size!(Parser<'_>, 288);
187187

188188
/// Stores span information about a closure.
189189
#[derive(Clone, Debug)]
@@ -260,6 +260,7 @@ struct CaptureState {
260260
capturing: Capturing,
261261
parser_replacements: Vec<ParserReplacement>,
262262
inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
263+
seen_attrs: FxHashSet<AttrId>,
263264
}
264265

265266
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
@@ -457,6 +458,7 @@ impl<'a> Parser<'a> {
457458
capturing: Capturing::No,
458459
parser_replacements: Vec::new(),
459460
inner_attr_parser_ranges: Default::default(),
461+
seen_attrs: Default::default(),
460462
},
461463
current_closure: None,
462464
recovery: Recovery::Allowed,

tests/crashes/129166.rs

Lines changed: 0 additions & 7 deletions
This file was deleted.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// This was triggering an assertion failure in `NodeRange::new`.
2+
3+
#![feature(cfg_eval)]
4+
#![feature(stmt_expr_attributes)]
5+
6+
fn f() -> u32 {
7+
#[cfg_eval] #[cfg(not(FALSE))] 0
8+
//~^ ERROR removing an expression is not supported in this position
9+
}
10+
11+
fn main() {}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
error: removing an expression is not supported in this position
2+
--> $DIR/invalid-node-range-issue-129166.rs:5:17
3+
|
4+
LL | #[cfg_eval] #[cfg(not(FALSE))] 0
5+
| ^^^^^^^^^^^^^^^^^^
6+
7+
error: aborting due to 1 previous error
8+

tests/ui/proc-macro/macro-rules-derive-cfg.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,15 @@ extern crate test_macros;
1414
macro_rules! produce_it {
1515
($expr:expr) => {
1616
#[derive(Print)]
17-
struct Foo {
18-
val: [bool; {
19-
let a = #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr;
20-
0
21-
}]
22-
}
17+
struct Foo(
18+
[bool; #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr]
19+
);
2320
}
2421
}
2522

2623
produce_it!(#[cfg_attr(not(FALSE), rustc_dummy(second))] {
27-
#![cfg_attr(not(FALSE), allow(unused))]
24+
#![cfg_attr(not(FALSE), rustc_dummy(third))]
25+
#[cfg_attr(not(FALSE), rustc_dummy(fourth))]
2826
30
2927
});
3028

0 commit comments

Comments
 (0)