From 55185992d6f4ef3c01aaab545488863d21232d8d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 27 Jul 2022 12:15:35 +1000 Subject: [PATCH 1/7] Improve shebang handling. Avoid doing stuff until it's necessary. --- compiler/rustc_parse/src/lexer/mod.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index e9701ec2d7f45..90df8dbd445e4 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -64,14 +64,11 @@ impl<'a> StringReader<'a> { let mut spacing = Spacing::Joint; // Skip `#!` at the start of the file - let start_src_index = self.src_index(self.pos); - let text: &str = &self.src[start_src_index..self.end_src_index]; - let is_beginning_of_file = self.pos == self.start_pos; - if is_beginning_of_file { - if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { - self.pos = self.pos + BytePos::from_usize(shebang_len); - spacing = Spacing::Alone; - } + if self.pos == self.start_pos + && let Some(shebang_len) = rustc_lexer::strip_shebang(self.src) + { + self.pos = self.pos + BytePos::from_usize(shebang_len); + spacing = Spacing::Alone; } // Skip trivial (whitespace & comments) tokens From bd23d68b4133fdf849544e8cbd866d86b535934d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 27 Jul 2022 12:50:22 +1000 Subject: [PATCH 2/7] Remove `StringReader::end_src_index`. It not needed, always being set to the end of the text. --- compiler/rustc_parse/src/lexer/mod.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 90df8dbd445e4..5868036c737b7 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -37,8 +37,7 @@ pub(crate) fn parse_token_trees<'a>( start_pos: BytePos, override_span: Option, ) -> (PResult<'a, TokenStream>, Vec) { - StringReader { sess, start_pos, pos: start_pos, end_src_index: src.len(), src, override_span } - .into_token_trees() + StringReader { sess, start_pos, pos: start_pos, src, override_span }.into_token_trees() } struct StringReader<'a> { @@ -47,8 +46,6 @@ struct StringReader<'a> { start_pos: BytePos, /// The absolute offset within the source_map of the current character. pos: BytePos, - /// Stop reading src at this index. - end_src_index: usize, /// Source text to tokenize. src: &'a str, override_span: Option, @@ -74,7 +71,7 @@ impl<'a> StringReader<'a> { // Skip trivial (whitespace & comments) tokens loop { let start_src_index = self.src_index(self.pos); - let text: &str = &self.src[start_src_index..self.end_src_index]; + let text: &str = &self.src[start_src_index..]; if text.is_empty() { let span = self.mk_sp(self.pos, self.pos); From b4fdf648eab9afc797fe5e2d9e30a9660f23d68d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 27 Jul 2022 13:50:48 +1000 Subject: [PATCH 3/7] Inline `first_token`. Because it's tiny and hot. --- compiler/rustc_lexer/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index a41e0374f410a..32260913491f4 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -221,6 +221,7 @@ pub fn strip_shebang(input: &str) -> Option { } /// Parses the first token from the provided input string. +#[inline] pub fn first_token(input: &str) -> Token { debug_assert!(!input.is_empty()); Cursor::new(input).advance_token() From c01a36d5e4b5a02061fbd99cef138567e3e8105b Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 27 Jul 2022 15:07:19 +1000 Subject: [PATCH 4/7] Avoid an unnecessary `return`. --- compiler/rustc_parse/src/lexer/tokentrees.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index 0816bc8deb66f..d510e36c60601 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -284,9 +284,9 @@ impl TokenStreamBuilder { { self.buf.pop(); self.buf.push(TokenTree::Token(glued, *joint)); - return; + } else { + self.buf.push(tree) } - self.buf.push(tree); } fn into_token_stream(self) -> TokenStream { From ddf62b5bd40c038f15b0db31c5c35eab6420ed8c Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 27 Jul 2022 15:13:15 +1000 Subject: [PATCH 5/7] Inline `TokenStreamBuilder::push`. Because it's small and hot. --- compiler/rustc_parse/src/lexer/tokentrees.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index d510e36c60601..aa70912dcde4c 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -277,6 +277,7 @@ struct TokenStreamBuilder { } impl TokenStreamBuilder { + #[inline(always)] fn push(&mut self, tree: TokenTree) { if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last() && let TokenTree::Token(token, joint) = &tree From e6b9fccfb12a19a928c238e0bbbd2ddec02885ed Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 27 Jul 2022 14:21:08 +1000 Subject: [PATCH 6/7] Add a size assertion for `Token`. --- compiler/rustc_parse/src/lexer/mod.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 5868036c737b7..9245e3a677acc 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -22,6 +22,13 @@ mod unicode_chars; use unescape_error_reporting::{emit_unescape_error, escaped_char}; +// This type is used a lot. Make sure it doesn't unintentionally get bigger. +// +// This assertion is in this crate, rather than in `rustc_lexer`, because that +// crate cannot depend on `rustc_data_structures`. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(rustc_lexer::Token, 72); + #[derive(Clone, Debug)] pub struct UnmatchedBrace { pub expected_delim: Delimiter, From 99f5c79d64c268e8603c6b00c88abda7319f26e2 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 27 Jul 2022 13:59:30 +1000 Subject: [PATCH 7/7] Shrink `Token`. From 72 bytes to 12 bytes (on x86-64). There are two parts to this: - Changing various source code offsets from 64-bit to 32-bit. This is not a problem because the rest of rustc also uses 32-bit source code offsets. This means `Token` is no longer `Copy` but this causes no problems. - Removing the `RawStrError` from `LiteralKind`. Raw string literal invalidity is now indicated by a `None` value within `RawStr`/`RawByteStr`, and the new `validate_raw_str` function can be used to re-lex an invalid raw string literal to get the `RawStrError`. There is one very small change in behaviour. Previously, if a raw string literal matched both the `InvalidStarter` and `TooManyHashes` cases, the latter would override the former. This has now changed, because `raw_double_quoted_string` now uses `?` and so returns immediately upon detecting the `InvalidStarter` case. I think this is a slight improvement to report the earlier-detected error, and it explains the change in the `test_too_many_hashes` test. The commit also removes a couple of comments that refer to #77629 and say that the size of these types don't affect performance. These comments are wrong, though the performance effect is small. --- compiler/rustc_ast/src/util/comments.rs | 10 ++- compiler/rustc_lexer/src/cursor.rs | 4 +- compiler/rustc_lexer/src/lib.rs | 88 ++++++++++--------- compiler/rustc_lexer/src/tests.rs | 50 +++++------ compiler/rustc_parse/src/lexer/mod.rs | 52 ++++++----- src/librustdoc/html/highlight.rs | 2 +- .../clippy/clippy_lints/src/matches/mod.rs | 2 +- .../src/undocumented_unsafe_blocks.rs | 2 +- .../clippy/clippy_utils/src/hir_utils.rs | 4 +- 9 files changed, 111 insertions(+), 103 deletions(-) diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs index b4fff0022e295..c96474ccb428a 100644 --- a/compiler/rustc_ast/src/util/comments.rs +++ b/compiler/rustc_ast/src/util/comments.rs @@ -194,7 +194,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec { if let Some(mut idx) = token_text.find('\n') { @@ -211,8 +211,10 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec { if doc_style.is_none() { - let code_to_the_right = - !matches!(text[pos + token.len..].chars().next(), Some('\r' | '\n')); + let code_to_the_right = !matches!( + text[pos + token.len as usize..].chars().next(), + Some('\r' | '\n') + ); let style = match (code_to_the_left, code_to_the_right) { (_, true) => CommentStyle::Mixed, (false, false) => CommentStyle::Isolated, @@ -246,7 +248,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec Cursor<'a> { } /// Returns amount of already consumed symbols. - pub(crate) fn len_consumed(&self) -> usize { - self.initial_len - self.chars.as_str().len() + pub(crate) fn len_consumed(&self) -> u32 { + (self.initial_len - self.chars.as_str().len()) as u32 } /// Resets the number of bytes consumed to 0. diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 32260913491f4..6d311af9007b1 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -38,18 +38,17 @@ use std::convert::TryFrom; #[derive(Debug)] pub struct Token { pub kind: TokenKind, - pub len: usize, + pub len: u32, } impl Token { - fn new(kind: TokenKind, len: usize) -> Token { + fn new(kind: TokenKind, len: u32) -> Token { Token { kind, len } } } /// Enum representing common lexeme types. -// perf note: Changing all `usize` to `u32` doesn't change performance. See #77629 -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TokenKind { // Multi-char tokens: /// "// comment" @@ -76,7 +75,7 @@ pub enum TokenKind { /// tokens. UnknownPrefix, /// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details. - Literal { kind: LiteralKind, suffix_start: usize }, + Literal { kind: LiteralKind, suffix_start: u32 }, /// "'a" Lifetime { starts_with_number: bool }, @@ -160,26 +159,24 @@ pub enum LiteralKind { Str { terminated: bool }, /// "b"abc"", "b"abc" ByteStr { terminated: bool }, - /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a" - RawStr { n_hashes: u8, err: Option }, - /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a" - RawByteStr { n_hashes: u8, err: Option }, + /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates + /// an invalid literal. + RawStr { n_hashes: Option }, + /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` + /// indicates an invalid literal. + RawByteStr { n_hashes: Option }, } -/// Error produced validating a raw string. Represents cases like: -/// - `r##~"abcde"##`: `InvalidStarter` -/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)` -/// - Too many `#`s (>255): `TooManyDelimiters` -// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum RawStrError { - /// Non `#` characters exist between `r` and `"` eg. `r#~"..` + /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##` InvalidStarter { bad_char: char }, - /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they - /// may have intended to terminate it. - NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option }, + /// The string was not terminated, e.g. `r###"abcde"##`. + /// `possible_terminator_offset` is the number of characters after `r` or + /// `br` where they may have intended to terminate it. + NoTerminator { expected: u32, found: u32, possible_terminator_offset: Option }, /// More than 255 `#`s exist. - TooManyDelimiters { found: usize }, + TooManyDelimiters { found: u32 }, } /// Base of numeric literal encoding according to its prefix. @@ -227,6 +224,19 @@ pub fn first_token(input: &str) -> Token { Cursor::new(input).advance_token() } +/// Validates a raw string literal. Used for getting more information about a +/// problem with a `RawStr`/`RawByteStr` with a `None` field. +#[inline] +pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> { + debug_assert!(!input.is_empty()); + let mut cursor = Cursor::new(input); + // Move past the leading `r` or `br`. + for _ in 0..prefix_len { + cursor.bump().unwrap(); + } + cursor.raw_double_quoted_string(prefix_len).map(|_| ()) +} + /// Creates an iterator that produces tokens from the input string. pub fn tokenize(input: &str) -> impl Iterator + '_ { let mut cursor = Cursor::new(input); @@ -316,12 +326,12 @@ impl Cursor<'_> { 'r' => match (self.first(), self.second()) { ('#', c1) if is_id_start(c1) => self.raw_ident(), ('#', _) | ('"', _) => { - let (n_hashes, err) = self.raw_double_quoted_string(1); + let res = self.raw_double_quoted_string(1); let suffix_start = self.len_consumed(); - if err.is_none() { + if res.is_ok() { self.eat_literal_suffix(); } - let kind = RawStr { n_hashes, err }; + let kind = RawStr { n_hashes: res.ok() }; Literal { kind, suffix_start } } _ => self.ident_or_unknown_prefix(), @@ -351,12 +361,12 @@ impl Cursor<'_> { } ('r', '"') | ('r', '#') => { self.bump(); - let (n_hashes, err) = self.raw_double_quoted_string(2); + let res = self.raw_double_quoted_string(2); let suffix_start = self.len_consumed(); - if err.is_none() { + if res.is_ok() { self.eat_literal_suffix(); } - let kind = RawByteStr { n_hashes, err }; + let kind = RawByteStr { n_hashes: res.ok() }; Literal { kind, suffix_start } } _ => self.ident_or_unknown_prefix(), @@ -699,19 +709,18 @@ impl Cursor<'_> { } /// Eats the double-quoted string and returns `n_hashes` and an error if encountered. - fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u8, Option) { + fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result { // Wrap the actual function to handle the error with too many hashes. // This way, it eats the whole raw string. - let (n_hashes, err) = self.raw_string_unvalidated(prefix_len); + let n_hashes = self.raw_string_unvalidated(prefix_len)?; // Only up to 255 `#`s are allowed in raw strings match u8::try_from(n_hashes) { - Ok(num) => (num, err), - // We lie about the number of hashes here :P - Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })), + Ok(num) => Ok(num), + Err(_) => Err(RawStrError::TooManyDelimiters { found: n_hashes }), } } - fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option) { + fn raw_string_unvalidated(&mut self, prefix_len: u32) -> Result { debug_assert!(self.prev() == 'r'); let start_pos = self.len_consumed(); let mut possible_terminator_offset = None; @@ -730,7 +739,7 @@ impl Cursor<'_> { Some('"') => (), c => { let c = c.unwrap_or(EOF_CHAR); - return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c })); + return Err(RawStrError::InvalidStarter { bad_char: c }); } } @@ -740,14 +749,11 @@ impl Cursor<'_> { self.eat_while(|c| c != '"'); if self.is_eof() { - return ( - n_start_hashes, - Some(RawStrError::NoTerminator { - expected: n_start_hashes, - found: max_hashes, - possible_terminator_offset, - }), - ); + return Err(RawStrError::NoTerminator { + expected: n_start_hashes, + found: max_hashes, + possible_terminator_offset, + }); } // Eat closing double quote. @@ -765,7 +771,7 @@ impl Cursor<'_> { } if n_end_hashes == n_start_hashes { - return (n_start_hashes, None); + return Ok(n_start_hashes); } else if n_end_hashes > max_hashes { // Keep track of possible terminators to give a hint about // where there might be a missing terminator diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index 07daee06f0f86..e4c1787f2ccef 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -2,42 +2,39 @@ use super::*; use expect_test::{expect, Expect}; -fn check_raw_str(s: &str, expected_hashes: u8, expected_err: Option) { +fn check_raw_str(s: &str, expected: Result) { let s = &format!("r{}", s); let mut cursor = Cursor::new(s); cursor.bump(); - let (n_hashes, err) = cursor.raw_double_quoted_string(0); - assert_eq!(n_hashes, expected_hashes); - assert_eq!(err, expected_err); + let res = cursor.raw_double_quoted_string(0); + assert_eq!(res, expected); } #[test] fn test_naked_raw_str() { - check_raw_str(r#""abc""#, 0, None); + check_raw_str(r#""abc""#, Ok(0)); } #[test] fn test_raw_no_start() { - check_raw_str(r##""abc"#"##, 0, None); + check_raw_str(r##""abc"#"##, Ok(0)); } #[test] fn test_too_many_terminators() { // this error is handled in the parser later - check_raw_str(r###"#"abc"##"###, 1, None); + check_raw_str(r###"#"abc"##"###, Ok(1)); } #[test] fn test_unterminated() { check_raw_str( r#"#"abc"#, - 1, - Some(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }), + Err(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }), ); check_raw_str( r###"##"abc"#"###, - 2, - Some(RawStrError::NoTerminator { + Err(RawStrError::NoTerminator { expected: 2, found: 1, possible_terminator_offset: Some(7), @@ -46,14 +43,13 @@ fn test_unterminated() { // We're looking for "# not just any # check_raw_str( r###"##"abc#"###, - 2, - Some(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }), + Err(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }), ) } #[test] fn test_invalid_start() { - check_raw_str(r##"#~"abc"#"##, 1, Some(RawStrError::InvalidStarter { bad_char: '~' })); + check_raw_str(r##"#~"abc"#"##, Err(RawStrError::InvalidStarter { bad_char: '~' })); } #[test] @@ -61,26 +57,24 @@ fn test_unterminated_no_pound() { // https://github.com/rust-lang/rust/issues/70677 check_raw_str( r#"""#, - 0, - Some(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }), + Err(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }), ); } #[test] fn test_too_many_hashes() { let max_count = u8::MAX; - let mut hashes: String = "#".repeat(max_count.into()); + let hashes1 = "#".repeat(max_count as usize); + let hashes2 = "#".repeat(max_count as usize + 1); + let middle = "\"abc\""; + let s1 = [&hashes1, middle, &hashes1].join(""); + let s2 = [&hashes2, middle, &hashes2].join(""); - // Valid number of hashes (255 = 2^8 - 1 = u8::MAX), but invalid string. - check_raw_str(&hashes, max_count, Some(RawStrError::InvalidStarter { bad_char: '\u{0}' })); + // Valid number of hashes (255 = 2^8 - 1 = u8::MAX). + check_raw_str(&s1, Ok(255)); // One more hash sign (256 = 2^8) becomes too many. - hashes.push('#'); - check_raw_str( - &hashes, - 0, - Some(RawStrError::TooManyDelimiters { found: usize::from(max_count) + 1 }), - ); + check_raw_str(&s2, Err(RawStrError::TooManyDelimiters { found: u32::from(max_count) + 1 })); } #[test] @@ -251,7 +245,7 @@ fn raw_string() { check_lexing( "r###\"\"#a\\b\x00c\"\"###", expect![[r#" - Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 } + Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 } "#]], ) } @@ -295,9 +289,9 @@ br###"raw"###suffix Token { kind: Whitespace, len: 1 } Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 } Token { kind: Whitespace, len: 1 } - Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 } + Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 12 }, len: 18 } Token { kind: Whitespace, len: 1 } - Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 } + Token { kind: Literal { kind: RawByteStr { n_hashes: Some(3) }, suffix_start: 13 }, len: 19 } Token { kind: Whitespace, len: 1 } "#]], ) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 9245e3a677acc..848e142e59ce9 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -27,7 +27,7 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char}; // This assertion is in this crate, rather than in `rustc_lexer`, because that // crate cannot depend on `rustc_data_structures`. #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -rustc_data_structures::static_assert_size!(rustc_lexer::Token, 72); +rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12); #[derive(Clone, Debug)] pub struct UnmatchedBrace { @@ -88,7 +88,7 @@ impl<'a> StringReader<'a> { let token = rustc_lexer::first_token(text); let start = self.pos; - self.pos = self.pos + BytePos::from_usize(token.len); + self.pos = self.pos + BytePos(token.len); debug!("next_token: {:?}({:?})", token.kind, self.str_from(start)); @@ -240,7 +240,7 @@ impl<'a> StringReader<'a> { token::Ident(sym, false) } rustc_lexer::TokenKind::Literal { kind, suffix_start } => { - let suffix_start = start + BytePos(suffix_start as u32); + let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); @@ -405,15 +405,21 @@ impl<'a> StringReader<'a> { } (token::ByteStr, Mode::ByteStr, 2, 1) // b" " } - rustc_lexer::LiteralKind::RawStr { n_hashes, err } => { - self.report_raw_str_error(start, err); - let n = u32::from(n_hashes); - (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + rustc_lexer::LiteralKind::RawStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + } else { + self.report_raw_str_error(start, 1); + } } - rustc_lexer::LiteralKind::RawByteStr { n_hashes, err } => { - self.report_raw_str_error(start, err); - let n = u32::from(n_hashes); - (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + } else { + self.report_raw_str_error(start, 2); + } } rustc_lexer::LiteralKind::Int { base, empty_int } => { return if empty_int { @@ -484,17 +490,17 @@ impl<'a> StringReader<'a> { &self.src[self.src_index(start)..self.src_index(end)] } - fn report_raw_str_error(&self, start: BytePos, opt_err: Option) { - match opt_err { - Some(RawStrError::InvalidStarter { bad_char }) => { + fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! { + match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) { + Err(RawStrError::InvalidStarter { bad_char }) => { self.report_non_started_raw_string(start, bad_char) } - Some(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self + Err(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self .report_unterminated_raw_string(start, expected, possible_terminator_offset, found), - Some(RawStrError::TooManyDelimiters { found }) => { + Err(RawStrError::TooManyDelimiters { found }) => { self.report_too_many_hashes(start, found) } - None => (), + Ok(()) => panic!("no error found for supposedly invalid raw string literal"), } } @@ -511,9 +517,9 @@ impl<'a> StringReader<'a> { fn report_unterminated_raw_string( &self, start: BytePos, - n_hashes: usize, - possible_offset: Option, - found_terminators: usize, + n_hashes: u32, + possible_offset: Option, + found_terminators: u32, ) -> ! { let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code( self.mk_sp(start, start), @@ -526,7 +532,7 @@ impl<'a> StringReader<'a> { if n_hashes > 0 { err.note(&format!( "this raw string should be terminated with `\"{}`", - "#".repeat(n_hashes) + "#".repeat(n_hashes as usize) )); } @@ -537,7 +543,7 @@ impl<'a> StringReader<'a> { err.span_suggestion( span, "consider terminating the string here", - "#".repeat(n_hashes), + "#".repeat(n_hashes as usize), Applicability::MaybeIncorrect, ); } @@ -638,7 +644,7 @@ impl<'a> StringReader<'a> { } } - fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! { + fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! { self.fatal_span_( start, self.pos, diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index d2ef89078bf6d..05547ea1515c3 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -213,7 +213,7 @@ impl<'a> Iterator for TokenIter<'a> { return None; } let token = rustc_lexer::first_token(self.src); - let (text, rest) = self.src.split_at(token.len); + let (text, rest) = self.src.split_at(token.len as usize); self.src = rest; Some((token.kind, text)) } diff --git a/src/tools/clippy/clippy_lints/src/matches/mod.rs b/src/tools/clippy/clippy_lints/src/matches/mod.rs index b638f27160282..e9e13aece18f6 100644 --- a/src/tools/clippy/clippy_lints/src/matches/mod.rs +++ b/src/tools/clippy/clippy_lints/src/matches/mod.rs @@ -1112,7 +1112,7 @@ fn span_contains_cfg(cx: &LateContext<'_>, s: Span) -> bool { let mut pos = 0usize; let mut iter = tokenize(&snip).map(|t| { let start = pos; - pos += t.len; + pos += t.len as usize; (t.kind, start..pos) }); diff --git a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs index 04f16fd2161c5..d2e675a783eaa 100644 --- a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -345,7 +345,7 @@ fn text_has_safety_comment(src: &str, line_starts: &[BytePos], offset: usize) -> if line.starts_with("/*") { let src = src[line_start..line_starts.last().unwrap().to_usize() - offset].trim_start(); let mut tokens = tokenize(src); - return src[..tokens.next().unwrap().len] + return src[..tokens.next().unwrap().len as usize] .to_ascii_uppercase() .contains("SAFETY:") && tokens.all(|t| t.kind == TokenKind::Whitespace); diff --git a/src/tools/clippy/clippy_utils/src/hir_utils.rs b/src/tools/clippy/clippy_utils/src/hir_utils.rs index eaf260ddfb832..1834e2a2de872 100644 --- a/src/tools/clippy/clippy_utils/src/hir_utils.rs +++ b/src/tools/clippy/clippy_utils/src/hir_utils.rs @@ -141,7 +141,7 @@ impl HirEqInterExpr<'_, '_, '_> { let mut left_pos = 0; let left = tokenize(&left) .map(|t| { - let end = left_pos + t.len; + let end = left_pos + t.len as usize; let s = &left[left_pos..end]; left_pos = end; (t, s) @@ -156,7 +156,7 @@ impl HirEqInterExpr<'_, '_, '_> { let mut right_pos = 0; let right = tokenize(&right) .map(|t| { - let end = right_pos + t.len; + let end = right_pos + t.len as usize; let s = &right[right_pos..end]; right_pos = end; (t, s)