Skip to content

Commit 2d9f0e2

Browse files
committed
Optimize bidi character detection.
1 parent 473eaa4 commit 2d9f0e2

File tree

2 files changed

+40
-6
lines changed

2 files changed

+40
-6
lines changed

compiler/rustc_parse/src/lexer/mod.rs

+39-6
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,45 @@ impl<'a> StringReader<'a> {
137137
// Opening delimiter of the length 2 is not included into the comment text.
138138
let content_start = start + BytePos(2);
139139
let content = self.str_from(content_start);
140-
let span = self.mk_sp(start, self.pos);
141-
const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
142-
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
143-
'\u{202C}', '\u{2069}',
144-
];
145-
if content.contains(UNICODE_TEXT_FLOW_CHARS) {
140+
141+
// Char - UTF-8
142+
// U+202A - E2 80 AA
143+
// U+202B - E2 80 AB
144+
// U+202C - E2 80 AC
145+
// U+202D - E2 80 AD
146+
// U+202E - E2 80 AE
147+
// U+2066 - E2 81 A6
148+
// U+2067 - E2 81 A7
149+
// U+2068 - E2 81 A8
150+
// U+2069 - E2 81 A9
151+
let mut bytes = content.as_bytes();
152+
let contains_flow_control_chars = loop {
153+
match core::slice::memchr::memchr(0xE2, &bytes) {
154+
Some(idx) => {
155+
// bytes are valid UTF-8 -> E2 must be followed by two bytes
156+
match bytes[idx + 1] {
157+
0x80 => {
158+
if (0xAA..=0xAE).contains(&bytes[idx + 2]) {
159+
break true;
160+
}
161+
}
162+
0x81 => {
163+
if (0xA6..=0xA9).contains(&bytes[idx + 2]) {
164+
break true;
165+
}
166+
}
167+
_ => {}
168+
}
169+
bytes = &bytes[idx + 3..];
170+
}
171+
None => {
172+
break false;
173+
}
174+
}
175+
};
176+
177+
if contains_flow_control_chars {
178+
let span = self.mk_sp(start, self.pos);
146179
self.sess.buffer_lint_with_diagnostic(
147180
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
148181
span,

compiler/rustc_parse/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#![feature(crate_visibility_modifier)]
55
#![feature(if_let_guard)]
66
#![feature(box_patterns)]
7+
#![feature(slice_internals)]
78
#![recursion_limit = "256"]
89

910
#[macro_use]

0 commit comments

Comments
 (0)