|
3 | 3 |
|
4 | 4 | use super::StringReader; |
5 | 5 | use errors::{Applicability, DiagnosticBuilder}; |
6 | | -use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; |
| 6 | +use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw}; |
| 7 | +use crate::parse::token; |
7 | 8 |
|
8 | 9 | #[rustfmt::skip] // for line breaks |
9 | 10 | const UNICODE_ARRAY: &[(char, &str, char)] = &[ |
@@ -297,53 +298,59 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[ |
297 | 298 | ('>', "Fullwidth Greater-Than Sign", '>'), |
298 | 299 | ]; |
299 | 300 |
|
300 | | -const ASCII_ARRAY: &[(char, &str)] = &[ |
301 | | - (' ', "Space"), |
302 | | - ('_', "Underscore"), |
303 | | - ('-', "Minus/Hyphen"), |
304 | | - (',', "Comma"), |
305 | | - (';', "Semicolon"), |
306 | | - (':', "Colon"), |
307 | | - ('!', "Exclamation Mark"), |
308 | | - ('?', "Question Mark"), |
309 | | - ('.', "Period"), |
310 | | - ('\'', "Single Quote"), |
311 | | - ('"', "Quotation Mark"), |
312 | | - ('(', "Left Parenthesis"), |
313 | | - (')', "Right Parenthesis"), |
314 | | - ('[', "Left Square Bracket"), |
315 | | - (']', "Right Square Bracket"), |
316 | | - ('{', "Left Curly Brace"), |
317 | | - ('}', "Right Curly Brace"), |
318 | | - ('*', "Asterisk"), |
319 | | - ('/', "Slash"), |
320 | | - ('\\', "Backslash"), |
321 | | - ('&', "Ampersand"), |
322 | | - ('+', "Plus Sign"), |
323 | | - ('<', "Less-Than Sign"), |
324 | | - ('=', "Equals Sign"), |
325 | | - ('>', "Greater-Than Sign"), |
| 301 | +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of |
| 302 | +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. |
| 303 | +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add |
| 304 | +// fancier error recovery to it, as there will be less overall work to do this way. |
| 305 | +const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ |
| 306 | + (' ', "Space", Some(token::Whitespace)), |
| 307 | + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), |
| 308 | + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), |
| 309 | + (',', "Comma", Some(token::Comma)), |
| 310 | + (';', "Semicolon", Some(token::Semi)), |
| 311 | + (':', "Colon", Some(token::Colon)), |
| 312 | + ('!', "Exclamation Mark", Some(token::Not)), |
| 313 | + ('?', "Question Mark", Some(token::Question)), |
| 314 | + ('.', "Period", Some(token::Dot)), |
| 315 | + ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), |
| 316 | + (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), |
| 317 | + ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), |
| 318 | + (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))), |
| 319 | + ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))), |
| 320 | + ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))), |
| 321 | + ('*', "Asterisk", Some(token::BinOp(token::Star))), |
| 322 | + ('/', "Slash", Some(token::BinOp(token::Slash))), |
| 323 | + ('\\', "Backslash", None), |
| 324 | + ('&', "Ampersand", Some(token::BinOp(token::And))), |
| 325 | + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), |
| 326 | + ('<', "Less-Than Sign", Some(token::Lt)), |
| 327 | + ('=', "Equals Sign", Some(token::Eq)), |
| 328 | + ('>', "Greater-Than Sign", Some(token::Gt)), |
| 329 | + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by |
| 330 | + // spitting the correct token out. |
| 331 | + ('\'', "Single Quote", None), |
| 332 | + ('"', "Quotation Mark", None), |
326 | 333 | ]; |
327 | 334 |
|
328 | 335 | crate fn check_for_substitution<'a>( |
329 | 336 | reader: &StringReader<'a>, |
330 | 337 | pos: BytePos, |
331 | 338 | ch: char, |
332 | 339 | err: &mut DiagnosticBuilder<'a>, |
333 | | -) -> bool { |
| 340 | +) -> Option<token::TokenKind> { |
334 | 341 | let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) { |
335 | 342 | Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char), |
336 | | - None => return false, |
| 343 | + None => return None, |
337 | 344 | }; |
338 | 345 |
|
339 | 346 | let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION); |
340 | 347 |
|
341 | | - let ascii_name = match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) { |
342 | | - Some((_ascii_char, ascii_name)) => ascii_name, |
| 348 | + let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) { |
| 349 | + Some((_ascii_char, ascii_name, token)) => (ascii_name, token), |
343 | 350 | None => { |
344 | 351 | let msg = format!("substitution character not found for '{}'", ch); |
345 | 352 | reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); |
346 | | - return false; |
| 353 | + return None; |
347 | 354 | } |
348 | 355 | }; |
349 | 356 |
|
@@ -371,7 +378,7 @@ crate fn check_for_substitution<'a>( |
371 | 378 | ); |
372 | 379 | err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect); |
373 | 380 | } |
374 | | - true |
| 381 | + token.clone() |
375 | 382 | } |
376 | 383 |
|
377 | 384 | /// Extract string if found at current position with given delimiters |
|
0 commit comments