Skip to content

Print a note if a character literal contains a variation selector #88795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 47 additions & 14 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use std::iter::once;
use std::ops::Range;

use rustc_errors::{Applicability, Handler};
use rustc_errors::{pluralize, Applicability, Handler};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};

Expand Down Expand Up @@ -49,24 +49,57 @@ pub(crate) fn emit_unescape_error(
.emit();
}
EscapeError::MoreThanOneChar => {
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
("", "if you meant to write a `str` literal, use double quotes")
};
use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};

handler
.struct_span_err(
span_with_quotes,
"character literal may only contain one codepoint",
)
.span_suggestion(
let mut has_help = false;
let mut handler = handler.struct_span_err(
span_with_quotes,
"character literal may only contain one codepoint",
);

if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
let escaped_marks =
lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
handler.span_note(
span,
&format!(
"this `{}` is followed by the combining mark{} `{}`",
lit.chars().next().unwrap(),
pluralize!(escaped_marks.len()),
escaped_marks.join(""),
),
);
let normalized = lit.nfc().to_string();
if normalized.chars().count() == 1 {
has_help = true;
handler.span_suggestion(
span,
&format!(
"consider using the normalized form `{}` of this character",
normalized.chars().next().unwrap().escape_default()
),
normalized,
Applicability::MachineApplicable,
);
}
}

if !has_help {
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
("", "if you meant to write a `str` literal, use double quotes")
};

handler.span_suggestion(
span_with_quotes,
msg,
format!("{}\"{}\"", prefix, lit),
Applicability::MachineApplicable,
)
.emit();
);
}

handler.emit();
}
EscapeError::EscapeOnlyChar => {
let (c, char_span) = last_char();
Expand Down
21 changes: 21 additions & 0 deletions src/test/ui/parser/unicode-character-literal.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Regression test for #88684: Improve diagnostics for combining marks
// in character literals.

// run-rustfix

fn main() {
let _spade = "♠️";
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _s = "ṩ̂̊";
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _a = 'Å';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
//~| HELP: consider using the normalized form `\u{c5}` of this character
}
21 changes: 21 additions & 0 deletions src/test/ui/parser/unicode-character-literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Regression test for #88684: Improve diagnostics for combining marks
// in character literals.

// run-rustfix

fn main() {
let _spade = '♠️';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _s = 'ṩ̂̊';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _a = 'Å';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
//~| HELP: consider using the normalized form `\u{c5}` of this character
}
48 changes: 48 additions & 0 deletions src/test/ui/parser/unicode-character-literal.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:7:18
|
LL | let _spade = '♠️';
| ^^^
|
note: this `♠` is followed by the combining mark `\u{fe0f}`
--> $DIR/unicode-character-literal.rs:7:19
|
LL | let _spade = '♠️';
| ^
help: if you meant to write a `str` literal, use double quotes
|
LL | let _spade = "♠️";
| ~~~

error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:12:14
|
LL | let _s = 'ṩ̂̊';
| ^^^
|
note: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
--> $DIR/unicode-character-literal.rs:12:15
|
LL | let _s = 'ṩ̂̊';
| ^
help: if you meant to write a `str` literal, use double quotes
|
LL | let _s = "ṩ̂̊";
| ~~~

error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:17:14
|
LL | let _a = 'Å';
| ^-^
| |
| help: consider using the normalized form `\u{c5}` of this character: `Å`
|
note: this `A` is followed by the combining mark `\u{30a}`
--> $DIR/unicode-character-literal.rs:17:15
|
LL | let _a = 'Å';
| ^

error: aborting due to 3 previous errors