Skip to content

Commit 847b9f7

Browse files
committed
mbe: Refactor the logic for ${concat}
* Now accept numbers, chars * Suffixes are stripped (needs more testing) * Report specific locations of errors * TODO: handle idents the same for expanded tokens
1 parent 6f81ed3 commit 847b9f7

File tree

15 files changed

+524
-234
lines changed

15 files changed

+524
-234
lines changed

compiler/rustc_builtin_macros/src/concat.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ pub(crate) fn expand_concat(
2424
let mut guar = None;
2525
for e in es {
2626
match e.kind {
27+
// For consistent user experience, please keep this in sync with the handling of
28+
// literals in `rustc_expand::mbe::metavar_expr` `${concat()}`!
2729
ExprKind::Lit(token_lit) => match LitKind::from_token_lit(token_lit) {
2830
Ok(LitKind::Str(s, _) | LitKind::Float(s, _)) => {
2931
accumulator.push_str(s.as_str());

compiler/rustc_expand/messages.ftl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,21 @@ expand_module_multiple_candidates =
133133
expand_must_repeat_once =
134134
this must repeat at least once
135135

136+
expand_mve_concat_invalid =
137+
invalid item within a `{"${concat(...)}"}` expression
138+
.expr_ident = expanding this `concat(...)` expression
139+
.invalid_ident = this literal produced an invalid identifier
140+
.float_lit = float literals cannot be concatenated
141+
.c_str_lit = C string literals cannot be concatenated
142+
.b_str_lit = byte literals cannot be concatenated
143+
.raw_ident = raw identifiers cannot be concatenated
144+
.unsupported = unsupported input for `concat(...)`
145+
.valid_types = `concat` can join {$valid}
146+
.expected_metavar = expected an identifier; got `{$found}`
147+
.expected_metavar_dollar = todo
148+
.unexpected_group = todo
149+
.hi_label = todo
150+
136151
expand_mve_expected_ident =
137152
expected an identifier
138153
.not_ident = not a valid identifier

compiler/rustc_expand/src/errors.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,49 @@ pub(crate) use metavar_exprs::*;
497497
mod metavar_exprs {
498498
use super::*;
499499

500+
#[derive(Diagnostic)]
501+
#[diag(expand_mve_concat_invalid)]
502+
pub(crate) struct MveConcatInvalid {
503+
#[primary_span]
504+
pub span: Span,
505+
#[subdiagnostic]
506+
pub reason: MveConcatInvalidReason,
507+
#[help(expand_expr_ident)]
508+
pub ident_span: Span,
509+
pub valid: &'static str,
510+
}
511+
512+
// TODO: can these be labels rather than notes?
513+
#[derive(Subdiagnostic)]
514+
pub(crate) enum MveConcatInvalidReason {
515+
#[note(expand_invalid_ident)]
516+
InvalidIdent,
517+
#[note(expand_float_lit)]
518+
#[help(expand_valid_types)]
519+
FloatLit,
520+
#[note(expand_c_str_lit)]
521+
#[help(expand_valid_types)]
522+
CStrLit,
523+
#[note(expand_b_str_lit)]
524+
#[help(expand_valid_types)]
525+
ByteStrLit,
526+
#[note(expand_expected_metavar)]
527+
#[label(expand_expected_metavar_dollar)]
528+
ExpectedMetavarIdent {
529+
found: String,
530+
#[primary_span]
531+
dollar: Span,
532+
},
533+
#[note(expand_raw_ident)]
534+
RawIdentifier,
535+
#[note(expand_unsupported)]
536+
#[help(expand_valid_types)]
537+
UnsupportedInput,
538+
#[note(expand_unexpected_group)]
539+
UnexpectedGroup,
540+
InvalidLiteral,
541+
}
542+
500543
#[derive(Diagnostic)]
501544
#[diag(expand_mve_expected_ident)]
502545
pub(crate) struct MveExpectedIdent {

compiler/rustc_expand/src/mbe/metavar_expr.rs

Lines changed: 94 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
1-
use rustc_ast::token::{self, Delimiter, IdentIsRaw, Lit, Token, TokenKind};
1+
use rustc_ast::token::{self, Delimiter, IdentIsRaw, Token, TokenKind};
22
use rustc_ast::tokenstream::{TokenStream, TokenStreamIter, TokenTree};
3-
use rustc_ast::{LitIntType, LitKind};
3+
use rustc_ast::{self as ast, LitIntType, LitKind};
44
use rustc_ast_pretty::pprust;
5-
use rustc_errors::{Applicability, PResult};
5+
use rustc_errors::PResult;
6+
use rustc_lexer::is_id_continue;
67
use rustc_macros::{Decodable, Encodable};
8+
use rustc_session::errors::create_lit_error;
79
use rustc_session::parse::ParseSess;
810
use rustc_span::{Ident, Span, Symbol};
911

10-
use crate::errors::{self, MveExpectedIdentContext};
12+
use crate::errors::{self, MveConcatInvalidReason, MveExpectedIdentContext};
1113

1214
pub(crate) const RAW_IDENT_ERR: &str = "`${concat(..)}` currently does not support raw identifiers";
13-
pub(crate) const UNSUPPORTED_CONCAT_ELEM_ERR: &str = "expected identifier or string literal";
15+
pub(crate) const VALID_EXPR_CONCAT_TYPES: &str =
16+
"metavariables, identifiers, string literals, and integer literals";
1417

1518
/// List of the below list for diagnostics.
1619
const VALID_METAVAR_EXPR_NAMES: &str = "`count`, `ignore`, `index`, `len`, and `concat`";
@@ -165,7 +168,7 @@ fn iter_span(iter: &TokenStreamIter<'_>) -> Option<Span> {
165168
pub(crate) enum MetaVarExprConcatElem {
166169
/// Identifier WITHOUT a preceding dollar sign, which means that this identifier should be
167170
/// interpreted as a literal.
168-
Ident(Ident),
171+
Ident(String),
169172
/// For example, a number or a string.
170173
Literal(Symbol),
171174
/// Identifier WITH a preceding dollar sign, which means that this identifier should be
@@ -181,30 +184,92 @@ fn parse_concat<'psess>(
181184
expr_ident_span: Span,
182185
) -> PResult<'psess, MetaVarExpr> {
183186
let mut result = Vec::new();
187+
let dcx = psess.dcx();
184188
loop {
185-
let is_var = try_eat_dollar(iter);
186-
let token = parse_token(iter, psess, outer_span)?;
187-
let element = if is_var {
188-
MetaVarExprConcatElem::Var(parse_ident_from_token(psess, token)?)
189-
} else if let TokenKind::Literal(Lit { kind: token::LitKind::Str, symbol, suffix: None }) =
190-
token.kind
191-
{
192-
MetaVarExprConcatElem::Literal(symbol)
193-
} else {
194-
match parse_ident_from_token(psess, token) {
195-
Err(err) => {
196-
err.cancel();
197-
return Err(psess
198-
.dcx()
199-
.struct_span_err(token.span, UNSUPPORTED_CONCAT_ELEM_ERR));
189+
let dollar = try_eat_dollar(iter);
190+
let Some(tt) = iter.next() else {
191+
// May be hit only with the first iteration (peek is otherwise checked at the end).
192+
break;
193+
};
194+
195+
let make_err = |reason| {
196+
let err = errors::MveConcatInvalid {
197+
span: tt.span(),
198+
ident_span: expr_ident_span,
199+
reason,
200+
valid: VALID_EXPR_CONCAT_TYPES,
201+
};
202+
Err(dcx.create_err(err))
203+
};
204+
205+
let token = match tt {
206+
TokenTree::Token(token, _) => token,
207+
TokenTree::Delimited(..) => {
208+
return make_err(MveConcatInvalidReason::UnexpectedGroup);
209+
}
210+
};
211+
212+
let element = if let Some(dollar) = dollar {
213+
// Expecting a metavar
214+
let Some((ident, _)) = token.ident() else {
215+
return make_err(MveConcatInvalidReason::ExpectedMetavarIdent {
216+
found: pprust::token_to_string(token).into_owned(),
217+
dollar,
218+
});
219+
};
220+
221+
// Variables get passed untouched
222+
MetaVarExprConcatElem::Var(ident)
223+
} else if let TokenKind::Literal(lit) = token.kind {
224+
// Preprocess with `from_token_lit` to handle unescaping, float / int literal suffix
225+
// stripping.
226+
//
227+
// For consistent user experience, please keep this in sync with the handling of
228+
// literals in `rustc_builtin_macros::concat`!
229+
let s = match ast::LitKind::from_token_lit(lit.clone()) {
230+
Ok(ast::LitKind::Str(s, _)) => s.to_string(),
231+
Ok(ast::LitKind::Float(..)) => {
232+
return make_err(MveConcatInvalidReason::FloatLit);
233+
}
234+
Ok(ast::LitKind::Char(c)) => c.to_string(),
235+
Ok(ast::LitKind::Int(i, _)) => i.to_string(),
236+
Ok(ast::LitKind::Bool(b)) => b.to_string(),
237+
Ok(ast::LitKind::CStr(..)) => return make_err(MveConcatInvalidReason::CStrLit),
238+
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
239+
return make_err(MveConcatInvalidReason::ByteStrLit);
200240
}
201-
Ok(elem) => MetaVarExprConcatElem::Ident(elem),
241+
Ok(ast::LitKind::Err(_guarantee)) => {
242+
// REVIEW: a diagnostic was already emitted, should we just break?
243+
return make_err(MveConcatInvalidReason::InvalidLiteral);
244+
}
245+
Err(err) => return Err(create_lit_error(psess, err, lit, token.span)),
246+
};
247+
248+
if !s.chars().all(|ch| is_id_continue(ch)) {
249+
// Check that all characters are valid in the middle of an identifier. This doesn't
250+
// guarantee that the final identifier is valid (we still need to check it later),
251+
// but it allows us to catch errors with specific arguments before expansion time;
252+
// for example, string literal "foo.bar" gets flagged before the macro is invoked.
253+
return make_err(MveConcatInvalidReason::InvalidIdent);
254+
}
255+
256+
MetaVarExprConcatElem::Ident(s)
257+
} else if let Some((elem, is_raw)) = token.ident() {
258+
if is_raw == IdentIsRaw::Yes {
259+
return make_err(MveConcatInvalidReason::RawIdentifier);
202260
}
261+
MetaVarExprConcatElem::Ident(elem.as_str().to_string())
262+
} else {
263+
return make_err(MveConcatInvalidReason::UnsupportedInput);
203264
};
265+
204266
result.push(element);
267+
205268
if iter.peek().is_none() {
269+
// break before trying to eat the comma
206270
break;
207271
}
272+
208273
if !try_eat_comma(iter) {
209274
return Err(psess.dcx().struct_span_err(outer_span, "expected comma"));
210275
}
@@ -290,43 +355,6 @@ fn parse_ident<'psess>(
290355
Ok(elem)
291356
}
292357

293-
fn parse_ident_from_token<'psess>(
294-
psess: &'psess ParseSess,
295-
token: &Token,
296-
) -> PResult<'psess, Ident> {
297-
if let Some((elem, is_raw)) = token.ident() {
298-
if let IdentIsRaw::Yes = is_raw {
299-
return Err(psess.dcx().struct_span_err(elem.span, RAW_IDENT_ERR));
300-
}
301-
return Ok(elem);
302-
}
303-
let token_str = pprust::token_to_string(token);
304-
let mut err = psess
305-
.dcx()
306-
.struct_span_err(token.span, format!("expected identifier, found `{token_str}`"));
307-
err.span_suggestion(
308-
token.span,
309-
format!("try removing `{token_str}`"),
310-
"",
311-
Applicability::MaybeIncorrect,
312-
);
313-
Err(err)
314-
}
315-
316-
fn parse_token<'psess, 't>(
317-
iter: &mut TokenStreamIter<'t>,
318-
psess: &'psess ParseSess,
319-
fallback_span: Span,
320-
) -> PResult<'psess, &'t Token> {
321-
let Some(tt) = iter.next() else {
322-
return Err(psess.dcx().struct_span_err(fallback_span, UNSUPPORTED_CONCAT_ELEM_ERR));
323-
};
324-
let TokenTree::Token(token, _) = tt else {
325-
return Err(psess.dcx().struct_span_err(tt.span(), UNSUPPORTED_CONCAT_ELEM_ERR));
326-
};
327-
Ok(token)
328-
}
329-
330358
/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
331359
/// iterator is not modified and the result is `false`.
332360
fn try_eat_comma(iter: &mut TokenStreamIter<'_>) -> bool {
@@ -337,14 +365,14 @@ fn try_eat_comma(iter: &mut TokenStreamIter<'_>) -> bool {
337365
false
338366
}
339367

340-
/// Tries to move the iterator forward returning `true` if there is a dollar sign. If not, then the
341-
/// iterator is not modified and the result is `false`.
342-
fn try_eat_dollar(iter: &mut TokenStreamIter<'_>) -> bool {
343-
if let Some(TokenTree::Token(Token { kind: token::Dollar, .. }, _)) = iter.peek() {
368+
/// Tries to move the iterator forward returning `Some(dollar_span)` if there is a dollar sign. If
369+
/// not, then the iterator is not modified and the result is `None`.
370+
fn try_eat_dollar(iter: &mut TokenStreamIter<'_>) -> Option<Span> {
371+
if let Some(TokenTree::Token(Token { kind: token::Dollar, span }, _)) = iter.peek() {
344372
let _ = iter.next();
345-
return true;
373+
return Some(*span);
346374
}
347-
false
375+
None
348376
}
349377

350378
/// Expects that the next item is a dollar sign.
@@ -353,7 +381,7 @@ fn eat_dollar<'psess>(
353381
psess: &'psess ParseSess,
354382
span: Span,
355383
) -> PResult<'psess, ()> {
356-
if try_eat_dollar(iter) {
384+
if try_eat_dollar(iter).is_some() {
357385
return Ok(());
358386
}
359387
Err(psess.dcx().struct_span_err(

compiler/rustc_expand/src/mbe/transcribe.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -545,9 +545,10 @@ fn metavar_expr_concat<'tx>(
545545
let dcx = tscx.psess.dcx();
546546
let mut concatenated = String::new();
547547
for element in elements.into_iter() {
548-
let symbol = match element {
549-
MetaVarExprConcatElem::Ident(elem) => elem.name,
550-
MetaVarExprConcatElem::Literal(elem) => *elem,
548+
let tmp_sym;
549+
let sym_str = match element {
550+
MetaVarExprConcatElem::Ident(elem) => elem.as_str(),
551+
MetaVarExprConcatElem::Literal(elem) => elem.as_str(),
551552
MetaVarExprConcatElem::Var(ident) => {
552553
match matched_from_ident(dcx, *ident, tscx.interp)? {
553554
NamedMatch::MatchedSeq(named_matches) => {
@@ -557,16 +558,20 @@ fn metavar_expr_concat<'tx>(
557558
match &named_matches[*curr_idx] {
558559
// FIXME(c410-f3r) Nested repetitions are unimplemented
559560
MatchedSeq(_) => unimplemented!(),
560-
MatchedSingle(pnr) => extract_symbol_from_pnr(dcx, pnr, ident.span)?,
561+
MatchedSingle(pnr) => {
562+
tmp_sym = extract_symbol_from_pnr(tscx, pnr, ident.span)?;
563+
tmp_sym.as_str()
564+
}
561565
}
562566
}
563567
NamedMatch::MatchedSingle(pnr) => {
564-
extract_symbol_from_pnr(dcx, pnr, ident.span)?
568+
tmp_sym = extract_symbol_from_pnr(tscx, pnr, ident.span)?;
569+
tmp_sym.as_str()
565570
}
566571
}
567572
}
568573
};
569-
concatenated.push_str(symbol.as_str());
574+
concatenated.push_str(sym_str);
570575
}
571576
let symbol = nfc_normalize(&concatenated);
572577
let concatenated_span = tscx.visited_dspan(dspan);
@@ -900,11 +905,13 @@ fn out_of_bounds_err<'a>(dcx: DiagCtxtHandle<'a>, max: usize, span: Span, ty: &s
900905
}
901906

902907
/// Extracts an metavariable symbol that can be an identifier, a token tree or a literal.
903-
fn extract_symbol_from_pnr<'a>(
904-
dcx: DiagCtxtHandle<'a>,
908+
// TODO: use the same logic as for metavar_expr
909+
fn extract_symbol_from_pnr<'tx>(
910+
tscx: &mut TranscrCtx<'tx, '_>,
905911
pnr: &ParseNtResult,
906912
span_err: Span,
907-
) -> PResult<'a, Symbol> {
913+
) -> PResult<'tx, Symbol> {
914+
let dcx = tscx.psess.dcx();
908915
match pnr {
909916
ParseNtResult::Ident(nt_ident, is_raw) => {
910917
if let IdentIsRaw::Yes = is_raw {

0 commit comments

Comments
 (0)