From 55185992d6f4ef3c01aaab545488863d21232d8d Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 27 Jul 2022 12:15:35 +1000
Subject: [PATCH 1/7] Improve shebang handling.

Avoid doing stuff until it's necessary.
---
 compiler/rustc_parse/src/lexer/mod.rs | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index e9701ec2d7f45..90df8dbd445e4 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -64,14 +64,11 @@ impl<'a> StringReader<'a> {
         let mut spacing = Spacing::Joint;
 
         // Skip `#!` at the start of the file
-        let start_src_index = self.src_index(self.pos);
-        let text: &str = &self.src[start_src_index..self.end_src_index];
-        let is_beginning_of_file = self.pos == self.start_pos;
-        if is_beginning_of_file {
-            if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
-                self.pos = self.pos + BytePos::from_usize(shebang_len);
-                spacing = Spacing::Alone;
-            }
+        if self.pos == self.start_pos
+            && let Some(shebang_len) = rustc_lexer::strip_shebang(self.src)
+        {
+            self.pos = self.pos + BytePos::from_usize(shebang_len);
+            spacing = Spacing::Alone;
         }
 
         // Skip trivial (whitespace & comments) tokens

From bd23d68b4133fdf849544e8cbd866d86b535934d Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 27 Jul 2022 12:50:22 +1000
Subject: [PATCH 2/7] Remove `StringReader::end_src_index`.

It not needed, always being set to the end of the text.
---
 compiler/rustc_parse/src/lexer/mod.rs | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 90df8dbd445e4..5868036c737b7 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -37,8 +37,7 @@ pub(crate) fn parse_token_trees<'a>(
     start_pos: BytePos,
     override_span: Option<Span>,
 ) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
-    StringReader { sess, start_pos, pos: start_pos, end_src_index: src.len(), src, override_span }
-        .into_token_trees()
+    StringReader { sess, start_pos, pos: start_pos, src, override_span }.into_token_trees()
 }
 
 struct StringReader<'a> {
@@ -47,8 +46,6 @@ struct StringReader<'a> {
     start_pos: BytePos,
     /// The absolute offset within the source_map of the current character.
     pos: BytePos,
-    /// Stop reading src at this index.
-    end_src_index: usize,
     /// Source text to tokenize.
     src: &'a str,
     override_span: Option<Span>,
@@ -74,7 +71,7 @@ impl<'a> StringReader<'a> {
         // Skip trivial (whitespace & comments) tokens
         loop {
             let start_src_index = self.src_index(self.pos);
-            let text: &str = &self.src[start_src_index..self.end_src_index];
+            let text: &str = &self.src[start_src_index..];
 
             if text.is_empty() {
                 let span = self.mk_sp(self.pos, self.pos);

From b4fdf648eab9afc797fe5e2d9e30a9660f23d68d Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 27 Jul 2022 13:50:48 +1000
Subject: [PATCH 3/7] Inline `first_token`.

Because it's tiny and hot.
---
 compiler/rustc_lexer/src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index a41e0374f410a..32260913491f4 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -221,6 +221,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
 }
 
 /// Parses the first token from the provided input string.
+#[inline]
 pub fn first_token(input: &str) -> Token {
     debug_assert!(!input.is_empty());
     Cursor::new(input).advance_token()

From c01a36d5e4b5a02061fbd99cef138567e3e8105b Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 27 Jul 2022 15:07:19 +1000
Subject: [PATCH 4/7] Avoid an unnecessary `return`.

---
 compiler/rustc_parse/src/lexer/tokentrees.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index 0816bc8deb66f..d510e36c60601 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -284,9 +284,9 @@ impl TokenStreamBuilder {
         {
             self.buf.pop();
             self.buf.push(TokenTree::Token(glued, *joint));
-            return;
+        } else {
+            self.buf.push(tree)
         }
-        self.buf.push(tree);
     }
 
     fn into_token_stream(self) -> TokenStream {

From ddf62b5bd40c038f15b0db31c5c35eab6420ed8c Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 27 Jul 2022 15:13:15 +1000
Subject: [PATCH 5/7] Inline `TokenStreamBuilder::push`.

Because it's small and hot.
---
 compiler/rustc_parse/src/lexer/tokentrees.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index d510e36c60601..aa70912dcde4c 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -277,6 +277,7 @@ struct TokenStreamBuilder {
 }
 
 impl TokenStreamBuilder {
+    #[inline(always)]
     fn push(&mut self, tree: TokenTree) {
         if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last()
             && let TokenTree::Token(token, joint) = &tree

From e6b9fccfb12a19a928c238e0bbbd2ddec02885ed Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 27 Jul 2022 14:21:08 +1000
Subject: [PATCH 6/7] Add a size assertion for `Token`.

---
 compiler/rustc_parse/src/lexer/mod.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 5868036c737b7..9245e3a677acc 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -22,6 +22,13 @@ mod unicode_chars;
 
 use unescape_error_reporting::{emit_unescape_error, escaped_char};
 
+// This type is used a lot. Make sure it doesn't unintentionally get bigger.
+//
+// This assertion is in this crate, rather than in `rustc_lexer`, because that
+// crate cannot depend on `rustc_data_structures`.
+#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+rustc_data_structures::static_assert_size!(rustc_lexer::Token, 72);
+
 #[derive(Clone, Debug)]
 pub struct UnmatchedBrace {
     pub expected_delim: Delimiter,

From 99f5c79d64c268e8603c6b00c88abda7319f26e2 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 27 Jul 2022 13:59:30 +1000
Subject: [PATCH 7/7] Shrink `Token`.

From 72 bytes to 12 bytes (on x86-64).

There are two parts to this:
- Changing various source code offsets from 64-bit to 32-bit. This is
  not a problem because the rest of rustc also uses 32-bit source code
  offsets. This means `Token` is no longer `Copy` but this causes no
  problems.
- Removing the `RawStrError` from `LiteralKind`. Raw string literal
  invalidity is now indicated by a `None` value within
  `RawStr`/`RawByteStr`, and the new `validate_raw_str` function can be
  used to re-lex an invalid raw string literal to get the `RawStrError`.

There is one very small change in behaviour. Previously, if a raw string
literal matched both the `InvalidStarter` and `TooManyHashes` cases,
the latter would override the former. This has now changed, because
`raw_double_quoted_string` now uses `?` and so returns immediately upon
detecting the `InvalidStarter` case. I think this is a slight
improvement to report the earlier-detected error, and it explains the
change in the `test_too_many_hashes` test.

The commit also removes a couple of comments that refer to #77629 and
say that the size of these types don't affect performance. These
comments are wrong, though the performance effect is small.
---
 compiler/rustc_ast/src/util/comments.rs       | 10 ++-
 compiler/rustc_lexer/src/cursor.rs            |  4 +-
 compiler/rustc_lexer/src/lib.rs               | 88 ++++++++++---------
 compiler/rustc_lexer/src/tests.rs             | 50 +++++------
 compiler/rustc_parse/src/lexer/mod.rs         | 52 ++++++-----
 src/librustdoc/html/highlight.rs              |  2 +-
 .../clippy/clippy_lints/src/matches/mod.rs    |  2 +-
 .../src/undocumented_unsafe_blocks.rs         |  2 +-
 .../clippy/clippy_utils/src/hir_utils.rs      |  4 +-
 9 files changed, 111 insertions(+), 103 deletions(-)

diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs
index b4fff0022e295..c96474ccb428a 100644
--- a/compiler/rustc_ast/src/util/comments.rs
+++ b/compiler/rustc_ast/src/util/comments.rs
@@ -194,7 +194,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
     }
 
     for token in rustc_lexer::tokenize(&text[pos..]) {
-        let token_text = &text[pos..pos + token.len];
+        let token_text = &text[pos..pos + token.len as usize];
         match token.kind {
             rustc_lexer::TokenKind::Whitespace => {
                 if let Some(mut idx) = token_text.find('\n') {
@@ -211,8 +211,10 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
             }
             rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
                 if doc_style.is_none() {
-                    let code_to_the_right =
-                        !matches!(text[pos + token.len..].chars().next(), Some('\r' | '\n'));
+                    let code_to_the_right = !matches!(
+                        text[pos + token.len as usize..].chars().next(),
+                        Some('\r' | '\n')
+                    );
                     let style = match (code_to_the_left, code_to_the_right) {
                         (_, true) => CommentStyle::Mixed,
                         (false, false) => CommentStyle::Isolated,
@@ -246,7 +248,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
                 code_to_the_left = true;
             }
         }
-        pos += token.len;
+        pos += token.len as usize;
     }
 
     comments
diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
index 0ba6c56dbb501..21557a9c85401 100644
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -61,8 +61,8 @@ impl<'a> Cursor<'a> {
     }
 
     /// Returns amount of already consumed symbols.
-    pub(crate) fn len_consumed(&self) -> usize {
-        self.initial_len - self.chars.as_str().len()
+    pub(crate) fn len_consumed(&self) -> u32 {
+        (self.initial_len - self.chars.as_str().len()) as u32
     }
 
     /// Resets the number of bytes consumed to 0.
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 32260913491f4..6d311af9007b1 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -38,18 +38,17 @@ use std::convert::TryFrom;
 #[derive(Debug)]
 pub struct Token {
     pub kind: TokenKind,
-    pub len: usize,
+    pub len: u32,
 }
 
 impl Token {
-    fn new(kind: TokenKind, len: usize) -> Token {
+    fn new(kind: TokenKind, len: u32) -> Token {
         Token { kind, len }
     }
 }
 
 /// Enum representing common lexeme types.
-// perf note: Changing all `usize` to `u32` doesn't change performance. See #77629
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum TokenKind {
     // Multi-char tokens:
     /// "// comment"
@@ -76,7 +75,7 @@ pub enum TokenKind {
     /// tokens.
     UnknownPrefix,
     /// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
-    Literal { kind: LiteralKind, suffix_start: usize },
+    Literal { kind: LiteralKind, suffix_start: u32 },
     /// "'a"
     Lifetime { starts_with_number: bool },
 
@@ -160,26 +159,24 @@ pub enum LiteralKind {
     Str { terminated: bool },
     /// "b"abc"", "b"abc"
     ByteStr { terminated: bool },
-    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
-    RawStr { n_hashes: u8, err: Option<RawStrError> },
-    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
-    RawByteStr { n_hashes: u8, err: Option<RawStrError> },
+    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
+    /// an invalid literal.
+    RawStr { n_hashes: Option<u8> },
+    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
+    /// indicates an invalid literal.
+    RawByteStr { n_hashes: Option<u8> },
 }
 
-/// Error produced validating a raw string. Represents cases like:
-/// - `r##~"abcde"##`: `InvalidStarter`
-/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
-/// - Too many `#`s (>255): `TooManyDelimiters`
-// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum RawStrError {
-    /// Non `#` characters exist between `r` and `"` eg. `r#~"..`
+    /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
     InvalidStarter { bad_char: char },
-    /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
-    /// may have intended to terminate it.
-    NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
+    /// The string was not terminated, e.g. `r###"abcde"##`.
+    /// `possible_terminator_offset` is the number of characters after `r` or
+    /// `br` where they may have intended to terminate it.
+    NoTerminator { expected: u32, found: u32, possible_terminator_offset: Option<u32> },
     /// More than 255 `#`s exist.
-    TooManyDelimiters { found: usize },
+    TooManyDelimiters { found: u32 },
 }
 
 /// Base of numeric literal encoding according to its prefix.
@@ -227,6 +224,19 @@ pub fn first_token(input: &str) -> Token {
     Cursor::new(input).advance_token()
 }
 
+/// Validates a raw string literal. Used for getting more information about a
+/// problem with a `RawStr`/`RawByteStr` with a `None` field.
+#[inline]
+pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
+    debug_assert!(!input.is_empty());
+    let mut cursor = Cursor::new(input);
+    // Move past the leading `r` or `br`.
+    for _ in 0..prefix_len {
+        cursor.bump().unwrap();
+    }
+    cursor.raw_double_quoted_string(prefix_len).map(|_| ())
+}
+
 /// Creates an iterator that produces tokens from the input string.
 pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
     let mut cursor = Cursor::new(input);
@@ -316,12 +326,12 @@ impl Cursor<'_> {
             'r' => match (self.first(), self.second()) {
                 ('#', c1) if is_id_start(c1) => self.raw_ident(),
                 ('#', _) | ('"', _) => {
-                    let (n_hashes, err) = self.raw_double_quoted_string(1);
+                    let res = self.raw_double_quoted_string(1);
                     let suffix_start = self.len_consumed();
-                    if err.is_none() {
+                    if res.is_ok() {
                         self.eat_literal_suffix();
                     }
-                    let kind = RawStr { n_hashes, err };
+                    let kind = RawStr { n_hashes: res.ok() };
                     Literal { kind, suffix_start }
                 }
                 _ => self.ident_or_unknown_prefix(),
@@ -351,12 +361,12 @@ impl Cursor<'_> {
                 }
                 ('r', '"') | ('r', '#') => {
                     self.bump();
-                    let (n_hashes, err) = self.raw_double_quoted_string(2);
+                    let res = self.raw_double_quoted_string(2);
                     let suffix_start = self.len_consumed();
-                    if err.is_none() {
+                    if res.is_ok() {
                         self.eat_literal_suffix();
                     }
-                    let kind = RawByteStr { n_hashes, err };
+                    let kind = RawByteStr { n_hashes: res.ok() };
                     Literal { kind, suffix_start }
                 }
                 _ => self.ident_or_unknown_prefix(),
@@ -699,19 +709,18 @@ impl Cursor<'_> {
     }
 
     /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
-    fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u8, Option<RawStrError>) {
+    fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
         // Wrap the actual function to handle the error with too many hashes.
         // This way, it eats the whole raw string.
-        let (n_hashes, err) = self.raw_string_unvalidated(prefix_len);
+        let n_hashes = self.raw_string_unvalidated(prefix_len)?;
         // Only up to 255 `#`s are allowed in raw strings
         match u8::try_from(n_hashes) {
-            Ok(num) => (num, err),
-            // We lie about the number of hashes here :P
-            Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
+            Ok(num) => Ok(num),
+            Err(_) => Err(RawStrError::TooManyDelimiters { found: n_hashes }),
         }
     }
 
-    fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option<RawStrError>) {
+    fn raw_string_unvalidated(&mut self, prefix_len: u32) -> Result<u32, RawStrError> {
         debug_assert!(self.prev() == 'r');
         let start_pos = self.len_consumed();
         let mut possible_terminator_offset = None;
@@ -730,7 +739,7 @@ impl Cursor<'_> {
             Some('"') => (),
             c => {
                 let c = c.unwrap_or(EOF_CHAR);
-                return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
+                return Err(RawStrError::InvalidStarter { bad_char: c });
             }
         }
 
@@ -740,14 +749,11 @@ impl Cursor<'_> {
             self.eat_while(|c| c != '"');
 
             if self.is_eof() {
-                return (
-                    n_start_hashes,
-                    Some(RawStrError::NoTerminator {
-                        expected: n_start_hashes,
-                        found: max_hashes,
-                        possible_terminator_offset,
-                    }),
-                );
+                return Err(RawStrError::NoTerminator {
+                    expected: n_start_hashes,
+                    found: max_hashes,
+                    possible_terminator_offset,
+                });
             }
 
             // Eat closing double quote.
@@ -765,7 +771,7 @@ impl Cursor<'_> {
             }
 
             if n_end_hashes == n_start_hashes {
-                return (n_start_hashes, None);
+                return Ok(n_start_hashes);
             } else if n_end_hashes > max_hashes {
                 // Keep track of possible terminators to give a hint about
                 // where there might be a missing terminator
diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs
index 07daee06f0f86..e4c1787f2ccef 100644
--- a/compiler/rustc_lexer/src/tests.rs
+++ b/compiler/rustc_lexer/src/tests.rs
@@ -2,42 +2,39 @@ use super::*;
 
 use expect_test::{expect, Expect};
 
-fn check_raw_str(s: &str, expected_hashes: u8, expected_err: Option<RawStrError>) {
+fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) {
     let s = &format!("r{}", s);
     let mut cursor = Cursor::new(s);
     cursor.bump();
-    let (n_hashes, err) = cursor.raw_double_quoted_string(0);
-    assert_eq!(n_hashes, expected_hashes);
-    assert_eq!(err, expected_err);
+    let res = cursor.raw_double_quoted_string(0);
+    assert_eq!(res, expected);
 }
 
 #[test]
 fn test_naked_raw_str() {
-    check_raw_str(r#""abc""#, 0, None);
+    check_raw_str(r#""abc""#, Ok(0));
 }
 
 #[test]
 fn test_raw_no_start() {
-    check_raw_str(r##""abc"#"##, 0, None);
+    check_raw_str(r##""abc"#"##, Ok(0));
 }
 
 #[test]
 fn test_too_many_terminators() {
     // this error is handled in the parser later
-    check_raw_str(r###"#"abc"##"###, 1, None);
+    check_raw_str(r###"#"abc"##"###, Ok(1));
 }
 
 #[test]
 fn test_unterminated() {
     check_raw_str(
         r#"#"abc"#,
-        1,
-        Some(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }),
+        Err(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }),
     );
     check_raw_str(
         r###"##"abc"#"###,
-        2,
-        Some(RawStrError::NoTerminator {
+        Err(RawStrError::NoTerminator {
             expected: 2,
             found: 1,
             possible_terminator_offset: Some(7),
@@ -46,14 +43,13 @@ fn test_unterminated() {
     // We're looking for "# not just any #
     check_raw_str(
         r###"##"abc#"###,
-        2,
-        Some(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }),
+        Err(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }),
     )
 }
 
 #[test]
 fn test_invalid_start() {
-    check_raw_str(r##"#~"abc"#"##, 1, Some(RawStrError::InvalidStarter { bad_char: '~' }));
+    check_raw_str(r##"#~"abc"#"##, Err(RawStrError::InvalidStarter { bad_char: '~' }));
 }
 
 #[test]
@@ -61,26 +57,24 @@ fn test_unterminated_no_pound() {
     // https://github.com/rust-lang/rust/issues/70677
     check_raw_str(
         r#"""#,
-        0,
-        Some(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }),
+        Err(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }),
     );
 }
 
 #[test]
 fn test_too_many_hashes() {
     let max_count = u8::MAX;
-    let mut hashes: String = "#".repeat(max_count.into());
+    let hashes1 = "#".repeat(max_count as usize);
+    let hashes2 = "#".repeat(max_count as usize + 1);
+    let middle = "\"abc\"";
+    let s1 = [&hashes1, middle, &hashes1].join("");
+    let s2 = [&hashes2, middle, &hashes2].join("");
 
-    // Valid number of hashes (255 = 2^8 - 1 = u8::MAX), but invalid string.
-    check_raw_str(&hashes, max_count, Some(RawStrError::InvalidStarter { bad_char: '\u{0}' }));
+    // Valid number of hashes (255 = 2^8 - 1 = u8::MAX).
+    check_raw_str(&s1, Ok(255));
 
     // One more hash sign (256 = 2^8) becomes too many.
-    hashes.push('#');
-    check_raw_str(
-        &hashes,
-        0,
-        Some(RawStrError::TooManyDelimiters { found: usize::from(max_count) + 1 }),
-    );
+    check_raw_str(&s2, Err(RawStrError::TooManyDelimiters { found: u32::from(max_count) + 1 }));
 }
 
 #[test]
@@ -251,7 +245,7 @@ fn raw_string() {
     check_lexing(
         "r###\"\"#a\\b\x00c\"\"###",
         expect![[r#"
-            Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 }
+            Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 }
         "#]],
     )
 }
@@ -295,9 +289,9 @@ br###"raw"###suffix
             Token { kind: Whitespace, len: 1 }
             Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 }
             Token { kind: Whitespace, len: 1 }
-            Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 }
+            Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 12 }, len: 18 }
             Token { kind: Whitespace, len: 1 }
-            Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 }
+            Token { kind: Literal { kind: RawByteStr { n_hashes: Some(3) }, suffix_start: 13 }, len: 19 }
             Token { kind: Whitespace, len: 1 }
         "#]],
     )
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 9245e3a677acc..848e142e59ce9 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -27,7 +27,7 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char};
 // This assertion is in this crate, rather than in `rustc_lexer`, because that
 // crate cannot depend on `rustc_data_structures`.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(rustc_lexer::Token, 72);
+rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
 
 #[derive(Clone, Debug)]
 pub struct UnmatchedBrace {
@@ -88,7 +88,7 @@ impl<'a> StringReader<'a> {
             let token = rustc_lexer::first_token(text);
 
             let start = self.pos;
-            self.pos = self.pos + BytePos::from_usize(token.len);
+            self.pos = self.pos + BytePos(token.len);
 
             debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
 
@@ -240,7 +240,7 @@ impl<'a> StringReader<'a> {
                 token::Ident(sym, false)
             }
             rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
-                let suffix_start = start + BytePos(suffix_start as u32);
+                let suffix_start = start + BytePos(suffix_start);
                 let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
                 let suffix = if suffix_start < self.pos {
                     let string = self.str_from(suffix_start);
@@ -405,15 +405,21 @@ impl<'a> StringReader<'a> {
                 }
                 (token::ByteStr, Mode::ByteStr, 2, 1) // b" "
             }
-            rustc_lexer::LiteralKind::RawStr { n_hashes, err } => {
-                self.report_raw_str_error(start, err);
-                let n = u32::from(n_hashes);
-                (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "##
+            rustc_lexer::LiteralKind::RawStr { n_hashes } => {
+                if let Some(n_hashes) = n_hashes {
+                    let n = u32::from(n_hashes);
+                    (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "##
+                } else {
+                    self.report_raw_str_error(start, 1);
+                }
             }
-            rustc_lexer::LiteralKind::RawByteStr { n_hashes, err } => {
-                self.report_raw_str_error(start, err);
-                let n = u32::from(n_hashes);
-                (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "##
+            rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
+                if let Some(n_hashes) = n_hashes {
+                    let n = u32::from(n_hashes);
+                    (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "##
+                } else {
+                    self.report_raw_str_error(start, 2);
+                }
             }
             rustc_lexer::LiteralKind::Int { base, empty_int } => {
                 return if empty_int {
@@ -484,17 +490,17 @@ impl<'a> StringReader<'a> {
         &self.src[self.src_index(start)..self.src_index(end)]
     }
 
-    fn report_raw_str_error(&self, start: BytePos, opt_err: Option<RawStrError>) {
-        match opt_err {
-            Some(RawStrError::InvalidStarter { bad_char }) => {
+    fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! {
+        match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) {
+            Err(RawStrError::InvalidStarter { bad_char }) => {
                 self.report_non_started_raw_string(start, bad_char)
             }
-            Some(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self
+            Err(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self
                 .report_unterminated_raw_string(start, expected, possible_terminator_offset, found),
-            Some(RawStrError::TooManyDelimiters { found }) => {
+            Err(RawStrError::TooManyDelimiters { found }) => {
                 self.report_too_many_hashes(start, found)
             }
-            None => (),
+            Ok(()) => panic!("no error found for supposedly invalid raw string literal"),
         }
     }
 
@@ -511,9 +517,9 @@ impl<'a> StringReader<'a> {
     fn report_unterminated_raw_string(
         &self,
         start: BytePos,
-        n_hashes: usize,
-        possible_offset: Option<usize>,
-        found_terminators: usize,
+        n_hashes: u32,
+        possible_offset: Option<u32>,
+        found_terminators: u32,
     ) -> ! {
         let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code(
             self.mk_sp(start, start),
@@ -526,7 +532,7 @@ impl<'a> StringReader<'a> {
         if n_hashes > 0 {
             err.note(&format!(
                 "this raw string should be terminated with `\"{}`",
-                "#".repeat(n_hashes)
+                "#".repeat(n_hashes as usize)
             ));
         }
 
@@ -537,7 +543,7 @@ impl<'a> StringReader<'a> {
             err.span_suggestion(
                 span,
                 "consider terminating the string here",
-                "#".repeat(n_hashes),
+                "#".repeat(n_hashes as usize),
                 Applicability::MaybeIncorrect,
             );
         }
@@ -638,7 +644,7 @@ impl<'a> StringReader<'a> {
         }
     }
 
-    fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! {
+    fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
         self.fatal_span_(
             start,
             self.pos,
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index d2ef89078bf6d..05547ea1515c3 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -213,7 +213,7 @@ impl<'a> Iterator for TokenIter<'a> {
             return None;
         }
         let token = rustc_lexer::first_token(self.src);
-        let (text, rest) = self.src.split_at(token.len);
+        let (text, rest) = self.src.split_at(token.len as usize);
         self.src = rest;
         Some((token.kind, text))
     }
diff --git a/src/tools/clippy/clippy_lints/src/matches/mod.rs b/src/tools/clippy/clippy_lints/src/matches/mod.rs
index b638f27160282..e9e13aece18f6 100644
--- a/src/tools/clippy/clippy_lints/src/matches/mod.rs
+++ b/src/tools/clippy/clippy_lints/src/matches/mod.rs
@@ -1112,7 +1112,7 @@ fn span_contains_cfg(cx: &LateContext<'_>, s: Span) -> bool {
     let mut pos = 0usize;
     let mut iter = tokenize(&snip).map(|t| {
         let start = pos;
-        pos += t.len;
+        pos += t.len as usize;
         (t.kind, start..pos)
     });
 
diff --git a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs
index 04f16fd2161c5..d2e675a783eaa 100644
--- a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs
+++ b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs
@@ -345,7 +345,7 @@ fn text_has_safety_comment(src: &str, line_starts: &[BytePos], offset: usize) ->
         if line.starts_with("/*") {
             let src = src[line_start..line_starts.last().unwrap().to_usize() - offset].trim_start();
             let mut tokens = tokenize(src);
-            return src[..tokens.next().unwrap().len]
+            return src[..tokens.next().unwrap().len as usize]
                 .to_ascii_uppercase()
                 .contains("SAFETY:")
                 && tokens.all(|t| t.kind == TokenKind::Whitespace);
diff --git a/src/tools/clippy/clippy_utils/src/hir_utils.rs b/src/tools/clippy/clippy_utils/src/hir_utils.rs
index eaf260ddfb832..1834e2a2de872 100644
--- a/src/tools/clippy/clippy_utils/src/hir_utils.rs
+++ b/src/tools/clippy/clippy_utils/src/hir_utils.rs
@@ -141,7 +141,7 @@ impl HirEqInterExpr<'_, '_, '_> {
                 let mut left_pos = 0;
                 let left = tokenize(&left)
                     .map(|t| {
-                        let end = left_pos + t.len;
+                        let end = left_pos + t.len as usize;
                         let s = &left[left_pos..end];
                         left_pos = end;
                         (t, s)
@@ -156,7 +156,7 @@ impl HirEqInterExpr<'_, '_, '_> {
                 let mut right_pos = 0;
                 let right = tokenize(&right)
                     .map(|t| {
-                        let end = right_pos + t.len;
+                        let end = right_pos + t.len as usize;
                         let s = &right[right_pos..end];
                         right_pos = end;
                         (t, s)