From 59583ac554229ec3eea64f3a1a9e1a4c5d4badba Mon Sep 17 00:00:00 2001
From: Julian Wollersberger <julian.wollersberger@gmx.at>
Date: Sat, 13 Feb 2021 19:33:26 +0100
Subject: [PATCH 1/6] Move lexing of number and string literals into a separate
 file. Also make them freestanding functions instead of methods.

---
 compiler/rustc_lexer/src/lib.rs      | 336 ++-------------------------
 compiler/rustc_lexer/src/literals.rs | 309 ++++++++++++++++++++++++
 2 files changed, 327 insertions(+), 318 deletions(-)
 create mode 100644 compiler/rustc_lexer/src/literals.rs
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 44fc4db7dc199..366e0771320a8 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -22,15 +22,18 @@
 // `#![feature]` attributes should be added.
 
 mod cursor;
+mod literals;
 pub mod unescape;
 
 #[cfg(test)]
 mod tests;
 
-use self::LiteralKind::*;
 use self::TokenKind::*;
-use crate::cursor::{Cursor, EOF_CHAR};
-use std::convert::TryFrom;
+use crate::cursor::Cursor;
+use crate::literals::{
+    double_quoted_string, lifetime_or_char, number, raw_double_quoted_string, single_quoted_string,
+    LiteralKind,
+};
 
 /// Parsed token.
 /// It doesn't contain information about data that has been parsed,
@@ -137,55 +140,6 @@ pub enum DocStyle {
     Inner,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub enum LiteralKind {
-    /// "12_u8", "0o100", "0b120i99"
-    Int { base: Base, empty_int: bool },
-    /// "12.34f32", "0b100.100"
-    Float { base: Base, empty_exponent: bool },
-    /// "'a'", "'\\'", "'''", "';"
-    Char { terminated: bool },
-    /// "b'a'", "b'\\'", "b'''", "b';"
-    Byte { terminated: bool },
-    /// ""abc"", ""abc"
-    Str { terminated: bool },
-    /// "b"abc"", "b"abc"
-    ByteStr { terminated: bool },
-    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
-    RawStr { n_hashes: u16, err: Option<RawStrError> },
-    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
-    RawByteStr { n_hashes: u16, err: Option<RawStrError> },
-}
-
-/// Error produced validating a raw string. Represents cases like:
-/// - `r##~"abcde"##`: `InvalidStarter`
-/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
-/// - Too many `#`s (>65535): `TooManyDelimiters`
-// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub enum RawStrError {
-    /// Non `#` characters exist between `r` and `"` eg. `r#~"..`
-    InvalidStarter { bad_char: char },
-    /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
-    /// may have intended to terminate it.
-    NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
-    /// More than 65535 `#`s exist.
-    TooManyDelimiters { found: usize },
-}
-
-/// Base of numeric literal encoding according to its prefix.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub enum Base {
-    /// Literal starts with "0b".
-    Binary,
-    /// Literal starts with "0o".
-    Octal,
-    /// Literal starts with "0x".
-    Hexadecimal,
-    /// Literal doesn't contain a prefix.
-    Decimal,
-}
-
 /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
 /// but shebang isn't a part of rust syntax.
 pub fn strip_shebang(input: &str) -> Option<usize> {
@@ -315,12 +269,12 @@ impl Cursor<'_> {
             'r' => match (self.first(), self.second()) {
                 ('#', c1) if is_id_start(c1) => self.raw_ident(),
                 ('#', _) | ('"', _) => {
-                    let (n_hashes, err) = self.raw_double_quoted_string(1);
+                    let (n_hashes, err) = raw_double_quoted_string(self, 1);
                     let suffix_start = self.len_consumed();
                     if err.is_none() {
                         self.eat_literal_suffix();
                     }
-                    let kind = RawStr { n_hashes, err };
+                    let kind = LiteralKind::RawStr { n_hashes, err };
                     Literal { kind, suffix_start }
                 }
                 _ => self.ident(),
@@ -330,32 +284,32 @@ impl Cursor<'_> {
             'b' => match (self.first(), self.second()) {
                 ('\'', _) => {
                     self.bump();
-                    let terminated = self.single_quoted_string();
+                    let terminated = single_quoted_string(self);
                     let suffix_start = self.len_consumed();
                     if terminated {
                         self.eat_literal_suffix();
                     }
-                    let kind = Byte { terminated };
+                    let kind = LiteralKind::Byte { terminated };
                     Literal { kind, suffix_start }
                 }
                 ('"', _) => {
                     self.bump();
-                    let terminated = self.double_quoted_string();
+                    let terminated = double_quoted_string(self);
                     let suffix_start = self.len_consumed();
                     if terminated {
                         self.eat_literal_suffix();
                     }
-                    let kind = ByteStr { terminated };
+                    let kind = LiteralKind::ByteStr { terminated };
                     Literal { kind, suffix_start }
                 }
                 ('r', '"') | ('r', '#') => {
                     self.bump();
-                    let (n_hashes, err) = self.raw_double_quoted_string(2);
+                    let (n_hashes, err) = raw_double_quoted_string(self, 2);
                     let suffix_start = self.len_consumed();
                     if err.is_none() {
                         self.eat_literal_suffix();
                     }
-                    let kind = RawByteStr { n_hashes, err };
+                    let kind = LiteralKind::RawByteStr { n_hashes, err };
                     Literal { kind, suffix_start }
                 }
                 _ => self.ident(),
@@ -367,7 +321,7 @@ impl Cursor<'_> {
 
             // Numeric literal.
             c @ '0'..='9' => {
-                let literal_kind = self.number(c);
+                let literal_kind = number(self, c);
                 let suffix_start = self.len_consumed();
                 self.eat_literal_suffix();
                 TokenKind::Literal { kind: literal_kind, suffix_start }
@@ -402,16 +356,16 @@ impl Cursor<'_> {
             '%' => Percent,
 
             // Lifetime or character literal.
-            '\'' => self.lifetime_or_char(),
+            '\'' => lifetime_or_char(self),
 
             // String literal.
             '"' => {
-                let terminated = self.double_quoted_string();
+                let terminated = double_quoted_string(self);
                 let suffix_start = self.len_consumed();
                 if terminated {
                     self.eat_literal_suffix();
                 }
-                let kind = Str { terminated };
+                let kind = LiteralKind::Str { terminated };
                 Literal { kind, suffix_start }
             }
             _ => Unknown,
@@ -494,260 +448,6 @@ impl Cursor<'_> {
         Ident
     }
 
-    fn number(&mut self, first_digit: char) -> LiteralKind {
-        debug_assert!('0' <= self.prev() && self.prev() <= '9');
-        let mut base = Base::Decimal;
-        if first_digit == '0' {
-            // Attempt to parse encoding base.
-            let has_digits = match self.first() {
-                'b' => {
-                    base = Base::Binary;
-                    self.bump();
-                    self.eat_decimal_digits()
-                }
-                'o' => {
-                    base = Base::Octal;
-                    self.bump();
-                    self.eat_decimal_digits()
-                }
-                'x' => {
-                    base = Base::Hexadecimal;
-                    self.bump();
-                    self.eat_hexadecimal_digits()
-                }
-                // Not a base prefix.
-                '0'..='9' | '_' | '.' | 'e' | 'E' => {
-                    self.eat_decimal_digits();
-                    true
-                }
-                // Just a 0.
-                _ => return Int { base, empty_int: false },
-            };
-            // Base prefix was provided, but there were no digits
-            // after it, e.g. "0x".
-            if !has_digits {
-                return Int { base, empty_int: true };
-            }
-        } else {
-            // No base prefix, parse number in the usual way.
-            self.eat_decimal_digits();
-        };
-
-        match self.first() {
-            // Don't be greedy if this is actually an
-            // integer literal followed by field/method access or a range pattern
-            // (`0..2` and `12.foo()`)
-            '.' if self.second() != '.' && !is_id_start(self.second()) => {
-                // might have stuff after the ., and if it does, it needs to start
-                // with a number
-                self.bump();
-                let mut empty_exponent = false;
-                if self.first().is_digit(10) {
-                    self.eat_decimal_digits();
-                    match self.first() {
-                        'e' | 'E' => {
-                            self.bump();
-                            empty_exponent = !self.eat_float_exponent();
-                        }
-                        _ => (),
-                    }
-                }
-                Float { base, empty_exponent }
-            }
-            'e' | 'E' => {
-                self.bump();
-                let empty_exponent = !self.eat_float_exponent();
-                Float { base, empty_exponent }
-            }
-            _ => Int { base, empty_int: false },
-        }
-    }
-
-    fn lifetime_or_char(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == '\'');
-
-        let can_be_a_lifetime = if self.second() == '\'' {
-            // It's surely not a lifetime.
-            false
-        } else {
-            // If the first symbol is valid for identifier, it can be a lifetime.
-            // Also check if it's a number for a better error reporting (so '0 will
-            // be reported as invalid lifetime and not as unterminated char literal).
-            is_id_start(self.first()) || self.first().is_digit(10)
-        };
-
-        if !can_be_a_lifetime {
-            let terminated = self.single_quoted_string();
-            let suffix_start = self.len_consumed();
-            if terminated {
-                self.eat_literal_suffix();
-            }
-            let kind = Char { terminated };
-            return Literal { kind, suffix_start };
-        }
-
-        // Either a lifetime or a character literal with
-        // length greater than 1.
-
-        let starts_with_number = self.first().is_digit(10);
-
-        // Skip the literal contents.
-        // First symbol can be a number (which isn't a valid identifier start),
-        // so skip it without any checks.
-        self.bump();
-        self.eat_while(is_id_continue);
-
-        // Check if after skipping literal contents we've met a closing
-        // single quote (which means that user attempted to create a
-        // string with single quotes).
-        if self.first() == '\'' {
-            self.bump();
-            let kind = Char { terminated: true };
-            Literal { kind, suffix_start: self.len_consumed() }
-        } else {
-            Lifetime { starts_with_number }
-        }
-    }
-
-    fn single_quoted_string(&mut self) -> bool {
-        debug_assert!(self.prev() == '\'');
-        // Check if it's a one-symbol literal.
-        if self.second() == '\'' && self.first() != '\\' {
-            self.bump();
-            self.bump();
-            return true;
-        }
-
-        // Literal has more than one symbol.
-
-        // Parse until either quotes are terminated or error is detected.
-        loop {
-            match self.first() {
-                // Quotes are terminated, finish parsing.
-                '\'' => {
-                    self.bump();
-                    return true;
-                }
-                // Probably beginning of the comment, which we don't want to include
-                // to the error report.
-                '/' => break,
-                // Newline without following '\'' means unclosed quote, stop parsing.
-                '\n' if self.second() != '\'' => break,
-                // End of file, stop parsing.
-                EOF_CHAR if self.is_eof() => break,
-                // Escaped slash is considered one character, so bump twice.
-                '\\' => {
-                    self.bump();
-                    self.bump();
-                }
-                // Skip the character.
-                _ => {
-                    self.bump();
-                }
-            }
-        }
-        // String was not terminated.
-        false
-    }
-
-    /// Eats double-quoted string and returns true
-    /// if string is terminated.
-    fn double_quoted_string(&mut self) -> bool {
-        debug_assert!(self.prev() == '"');
-        while let Some(c) = self.bump() {
-            match c {
-                '"' => {
-                    return true;
-                }
-                '\\' if self.first() == '\\' || self.first() == '"' => {
-                    // Bump again to skip escaped character.
-                    self.bump();
-                }
-                _ => (),
-            }
-        }
-        // End of file reached.
-        false
-    }
-
-    /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
-    fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u16, Option<RawStrError>) {
-        // Wrap the actual function to handle the error with too many hashes.
-        // This way, it eats the whole raw string.
-        let (n_hashes, err) = self.raw_string_unvalidated(prefix_len);
-        // Only up to 65535 `#`s are allowed in raw strings
-        match u16::try_from(n_hashes) {
-            Ok(num) => (num, err),
-            // We lie about the number of hashes here :P
-            Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
-        }
-    }
-
-    fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option<RawStrError>) {
-        debug_assert!(self.prev() == 'r');
-        let start_pos = self.len_consumed();
-        let mut possible_terminator_offset = None;
-        let mut max_hashes = 0;
-
-        // Count opening '#' symbols.
-        let mut eaten = 0;
-        while self.first() == '#' {
-            eaten += 1;
-            self.bump();
-        }
-        let n_start_hashes = eaten;
-
-        // Check that string is started.
-        match self.bump() {
-            Some('"') => (),
-            c => {
-                let c = c.unwrap_or(EOF_CHAR);
-                return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
-            }
-        }
-
-        // Skip the string contents and on each '#' character met, check if this is
-        // a raw string termination.
-        loop {
-            self.eat_while(|c| c != '"');
-
-            if self.is_eof() {
-                return (
-                    n_start_hashes,
-                    Some(RawStrError::NoTerminator {
-                        expected: n_start_hashes,
-                        found: max_hashes,
-                        possible_terminator_offset,
-                    }),
-                );
-            }
-
-            // Eat closing double quote.
-            self.bump();
-
-            // Check that amount of closing '#' symbols
-            // is equal to the amount of opening ones.
-            // Note that this will not consume extra trailing `#` characters:
-            // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
-            // followed by a `#` token.
-            let mut n_end_hashes = 0;
-            while self.first() == '#' && n_end_hashes < n_start_hashes {
-                n_end_hashes += 1;
-                self.bump();
-            }
-
-            if n_end_hashes == n_start_hashes {
-                return (n_start_hashes, None);
-            } else if n_end_hashes > max_hashes {
-                // Keep track of possible terminators to give a hint about
-                // where there might be a missing terminator
-                possible_terminator_offset =
-                    Some(self.len_consumed() - start_pos - n_end_hashes + prefix_len);
-                max_hashes = n_end_hashes;
-            }
-        }
-    }
-
     fn eat_decimal_digits(&mut self) -> bool {
         let mut has_digits = false;
         loop {
diff --git a/compiler/rustc_lexer/src/literals.rs b/compiler/rustc_lexer/src/literals.rs
new file mode 100644
index 0000000000000..2bfa5f9228b84
--- /dev/null
+++ b/compiler/rustc_lexer/src/literals.rs
@@ -0,0 +1,309 @@
+use crate::cursor::{Cursor, EOF_CHAR};
+use crate::{is_id_continue, is_id_start, TokenKind};
+use std::convert::TryFrom;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum LiteralKind {
+    /// "12_u8", "0o100", "0b120i99"
+    Int { base: Base, empty_int: bool },
+    /// "12.34f32", "0b100.100"
+    Float { base: Base, empty_exponent: bool },
+    /// "'a'", "'\\'", "'''", "';"
+    Char { terminated: bool },
+    /// "b'a'", "b'\\'", "b'''", "b';"
+    Byte { terminated: bool },
+    /// ""abc"", ""abc"
+    Str { terminated: bool },
+    /// "b"abc"", "b"abc"
+    ByteStr { terminated: bool },
+    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
+    RawStr { n_hashes: u16, err: Option<RawStrError> },
+    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
+    RawByteStr { n_hashes: u16, err: Option<RawStrError> },
+}
+
+/// Base of numeric literal encoding according to its prefix.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Base {
+    /// Literal starts with "0b".
+    Binary,
+    /// Literal starts with "0o".
+    Octal,
+    /// Literal starts with "0x".
+    Hexadecimal,
+    /// Literal doesn't contain a prefix.
+    Decimal,
+}
+
+/// Error produced validating a raw string. Represents cases like:
+/// - `r##~"abcde"##`: `InvalidStarter`
+/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
+/// - Too many `#`s (>65535): `TooManyDelimiters`
+// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum RawStrError {
+    /// Non `#` characters exist between `r` and `"` eg. `r#~"..`
+    InvalidStarter { bad_char: char },
+    /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
+    /// may have intended to terminate it.
+    NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
+    /// More than 65535 `#`s exist.
+    TooManyDelimiters { found: usize },
+}
+
+pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
+    debug_assert!('0' <= cursor.prev() && cursor.prev() <= '9');
+    let mut base = Base::Decimal;
+    if first_digit == '0' {
+        // Attempt to parse encoding base.
+        let has_digits = match cursor.first() {
+            'b' => {
+                base = Base::Binary;
+                cursor.bump();
+                cursor.eat_decimal_digits()
+            }
+            'o' => {
+                base = Base::Octal;
+                cursor.bump();
+                cursor.eat_decimal_digits()
+            }
+            'x' => {
+                base = Base::Hexadecimal;
+                cursor.bump();
+                cursor.eat_hexadecimal_digits()
+            }
+            // Not a base prefix.
+            '0'..='9' | '_' | '.' | 'e' | 'E' => {
+                cursor.eat_decimal_digits();
+                true
+            }
+            // Just a 0.
+            _ => return LiteralKind::Int { base, empty_int: false },
+        };
+        // Base prefix was provided, but there were no digits
+        // after it, e.g. "0x".
+        if !has_digits {
+            return LiteralKind::Int { base, empty_int: true };
+        }
+    } else {
+        // No base prefix, parse number in the usual way.
+        cursor.eat_decimal_digits();
+    };
+
+    match cursor.first() {
+        // Don't be greedy if this is actually an
+        // integer literal followed by field/method access or a range pattern
+        // (`0..2` and `12.foo()`)
+        '.' if cursor.second() != '.' && !is_id_start(cursor.second()) => {
+            // might have stuff after the ., and if it does, it needs to start
+            // with a number
+            cursor.bump();
+            let mut empty_exponent = false;
+            if cursor.first().is_digit(10) {
+                cursor.eat_decimal_digits();
+                match cursor.first() {
+                    'e' | 'E' => {
+                        cursor.bump();
+                        empty_exponent = !cursor.eat_float_exponent();
+                    }
+                    _ => (),
+                }
+            }
+            LiteralKind::Float { base, empty_exponent }
+        }
+        'e' | 'E' => {
+            cursor.bump();
+            let empty_exponent = !cursor.eat_float_exponent();
+            LiteralKind::Float { base, empty_exponent }
+        }
+        _ => LiteralKind::Int { base, empty_int: false },
+    }
+}
+
+pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
+    debug_assert!(cursor.prev() == '\'');
+
+    let can_be_a_lifetime = if cursor.second() == '\'' {
+        // It's surely not a lifetime.
+        false
+    } else {
+        // If the first symbol is valid for identifier, it can be a lifetime.
+        // Also check if it's a number for a better error reporting (so '0 will
+        // be reported as invalid lifetime and not as unterminated char literal).
+        is_id_start(cursor.first()) || cursor.first().is_digit(10)
+    };
+
+    if !can_be_a_lifetime {
+        let terminated = single_quoted_string(cursor);
+        let suffix_start = cursor.len_consumed();
+        if terminated {
+            cursor.eat_literal_suffix();
+        }
+        let kind = LiteralKind::Char { terminated };
+        return TokenKind::Literal { kind, suffix_start };
+    }
+
+    // Either a lifetime or a character literal with
+    // length greater than 1.
+
+    let starts_with_number = cursor.first().is_digit(10);
+
+    // Skip the literal contents.
+    // First symbol can be a number (which isn't a valid identifier start),
+    // so skip it without any checks.
+    cursor.bump();
+    cursor.eat_while(is_id_continue);
+
+    // Check if after skipping literal contents we've met a closing
+    // single quote (which means that user attempted to create a
+    // string with single quotes).
+    if cursor.first() == '\'' {
+        cursor.bump();
+        let kind = LiteralKind::Char { terminated: true };
+        TokenKind::Literal { kind, suffix_start: cursor.len_consumed() }
+    } else {
+        TokenKind::Lifetime { starts_with_number }
+    }
+}
+
+pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
+    debug_assert!(cursor.prev() == '\'');
+    // Check if it's a one-symbol literal.
+    if cursor.second() == '\'' && cursor.first() != '\\' {
+        cursor.bump();
+        cursor.bump();
+        return true;
+    }
+
+    // Literal has more than one symbol.
+
+    // Parse until either quotes are terminated or error is detected.
+    loop {
+        match cursor.first() {
+            // Quotes are terminated, finish parsing.
+            '\'' => {
+                cursor.bump();
+                return true;
+            }
+            // Probably beginning of the comment, which we don't want to include
+            // to the error report.
+            '/' => break,
+            // Newline without following '\'' means unclosed quote, stop parsing.
+            '\n' if cursor.second() != '\'' => break,
+            // End of file, stop parsing.
+            EOF_CHAR if cursor.is_eof() => break,
+            // Escaped slash is considered one character, so bump twice.
+            '\\' => {
+                cursor.bump();
+                cursor.bump();
+            }
+            // Skip the character.
+            _ => {
+                cursor.bump();
+            }
+        }
+    }
+    // String was not terminated.
+    false
+}
+
+/// Eats double-quoted string and returns true
+/// if string is terminated.
+pub(crate) fn double_quoted_string(cursor: &mut Cursor) -> bool {
+    debug_assert!(cursor.prev() == '"');
+    while let Some(c) = cursor.bump() {
+        match c {
+            '"' => {
+                return true;
+            }
+            '\\' if cursor.first() == '\\' || cursor.first() == '"' => {
+                // Bump again to skip escaped character.
+                cursor.bump();
+            }
+            _ => (),
+        }
+    }
+    // End of file reached.
+    false
+}
+
+/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
+pub(crate) fn raw_double_quoted_string(
+    cursor: &mut Cursor,
+    prefix_len: usize,
+) -> (u16, Option<RawStrError>) {
+    // Wrap the actual function to handle the error with too many hashes.
+    // This way, it eats the whole raw string.
+    let (n_hashes, err) = raw_string_unvalidated(cursor, prefix_len);
+    // Only up to 65535 `#`s are allowed in raw strings
+    match u16::try_from(n_hashes) {
+        Ok(num) => (num, err),
+        // We lie about the number of hashes here :P
+        Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
+    }
+}
+
+fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Option<RawStrError>) {
+    debug_assert!(cursor.prev() == 'r');
+    let start_pos = cursor.len_consumed();
+    let mut possible_terminator_offset = None;
+    let mut max_hashes = 0;
+
+    // Count opening '#' symbols.
+    let mut eaten = 0;
+    while cursor.first() == '#' {
+        eaten += 1;
+        cursor.bump();
+    }
+    let n_start_hashes = eaten;
+
+    // Check that string is started.
+    match cursor.bump() {
+        Some('"') => (),
+        c => {
+            let c = c.unwrap_or(EOF_CHAR);
+            return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
+        }
+    }
+
+    // Skip the string contents and on each '#' character met, check if this is
+    // a raw string termination.
+    loop {
+        cursor.eat_while(|c| c != '"');
+
+        if cursor.is_eof() {
+            return (
+                n_start_hashes,
+                Some(RawStrError::NoTerminator {
+                    expected: n_start_hashes,
+                    found: max_hashes,
+                    possible_terminator_offset,
+                }),
+            );
+        }
+
+        // Eat closing double quote.
+        cursor.bump();
+
+        // Check that amount of closing '#' symbols
+        // is equal to the amount of opening ones.
+        // Note that this will not consume extra trailing `#` characters:
+        // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
+        // followed by a `#` token.
+        let mut n_end_hashes = 0;
+        while cursor.first() == '#' && n_end_hashes < n_start_hashes {
+            n_end_hashes += 1;
+            cursor.bump();
+        }
+
+        if n_end_hashes == n_start_hashes {
+            return (n_start_hashes, None);
+        } else if n_end_hashes > max_hashes {
+            // Keep track of possible terminators to give a hint about
+            // where there might be a missing terminator
+            possible_terminator_offset =
+                Some(cursor.len_consumed() - start_pos - n_end_hashes + prefix_len);
+            max_hashes = n_end_hashes;
+        }
+    }
+}

From a534bd71a32abe837f68496d051ada75022a291b Mon Sep 17 00:00:00 2001
From: Julian Wollersberger <julian.wollersberger@gmx.at>
Date: Wed, 3 Mar 2021 22:34:57 +0100
Subject: [PATCH 2/6] Move the `eat_*_digits()` methods to `literals.rs`.

---
 compiler/rustc_lexer/src/cursor.rs   |  7 +++
 compiler/rustc_lexer/src/lib.rs      | 74 ++++------------------------
 compiler/rustc_lexer/src/literals.rs | 67 +++++++++++++++++++++----
 3 files changed, 74 insertions(+), 74 deletions(-)

diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
index 297f3d19ca178..e188102c8e580 100644
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -81,4 +81,11 @@ impl<'a> Cursor<'a> {
 
         Some(c)
     }
+
+    /// Eats symbols while predicate returns true or until the end of file is reached.
+    pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
+        while predicate(self.first()) && !self.is_eof() {
+            self.bump();
+        }
+    }
 }
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 366e0771320a8..9855cf13092fb 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -31,8 +31,8 @@ mod tests;
 use self::TokenKind::*;
 use crate::cursor::Cursor;
 use crate::literals::{
-    double_quoted_string, lifetime_or_char, number, raw_double_quoted_string, single_quoted_string,
-    LiteralKind,
+    double_quoted_string, eat_literal_suffix, lifetime_or_char, number, raw_double_quoted_string,
+    single_quoted_string, LiteralKind,
 };
 
 /// Parsed token.
@@ -272,7 +272,7 @@ impl Cursor<'_> {
                     let (n_hashes, err) = raw_double_quoted_string(self, 1);
                     let suffix_start = self.len_consumed();
                     if err.is_none() {
-                        self.eat_literal_suffix();
+                        eat_literal_suffix(self);
                     }
                     let kind = LiteralKind::RawStr { n_hashes, err };
                     Literal { kind, suffix_start }
@@ -287,7 +287,7 @@ impl Cursor<'_> {
                     let terminated = single_quoted_string(self);
                     let suffix_start = self.len_consumed();
                     if terminated {
-                        self.eat_literal_suffix();
+                        eat_literal_suffix(self);
                     }
                     let kind = LiteralKind::Byte { terminated };
                     Literal { kind, suffix_start }
@@ -297,7 +297,7 @@ impl Cursor<'_> {
                     let terminated = double_quoted_string(self);
                     let suffix_start = self.len_consumed();
                     if terminated {
-                        self.eat_literal_suffix();
+                        eat_literal_suffix(self);
                     }
                     let kind = LiteralKind::ByteStr { terminated };
                     Literal { kind, suffix_start }
@@ -307,7 +307,7 @@ impl Cursor<'_> {
                     let (n_hashes, err) = raw_double_quoted_string(self, 2);
                     let suffix_start = self.len_consumed();
                     if err.is_none() {
-                        self.eat_literal_suffix();
+                        eat_literal_suffix(self);
                     }
                     let kind = LiteralKind::RawByteStr { n_hashes, err };
                     Literal { kind, suffix_start }
@@ -323,7 +323,7 @@ impl Cursor<'_> {
             c @ '0'..='9' => {
                 let literal_kind = number(self, c);
                 let suffix_start = self.len_consumed();
-                self.eat_literal_suffix();
+                eat_literal_suffix(self);
                 TokenKind::Literal { kind: literal_kind, suffix_start }
             }
 
@@ -363,7 +363,7 @@ impl Cursor<'_> {
                 let terminated = double_quoted_string(self);
                 let suffix_start = self.len_consumed();
                 if terminated {
-                    self.eat_literal_suffix();
+                    eat_literal_suffix(self);
                 }
                 let kind = LiteralKind::Str { terminated };
                 Literal { kind, suffix_start }
@@ -448,56 +448,7 @@ impl Cursor<'_> {
         Ident
     }
 
-    fn eat_decimal_digits(&mut self) -> bool {
-        let mut has_digits = false;
-        loop {
-            match self.first() {
-                '_' => {
-                    self.bump();
-                }
-                '0'..='9' => {
-                    has_digits = true;
-                    self.bump();
-                }
-                _ => break,
-            }
-        }
-        has_digits
-    }
-
-    fn eat_hexadecimal_digits(&mut self) -> bool {
-        let mut has_digits = false;
-        loop {
-            match self.first() {
-                '_' => {
-                    self.bump();
-                }
-                '0'..='9' | 'a'..='f' | 'A'..='F' => {
-                    has_digits = true;
-                    self.bump();
-                }
-                _ => break,
-            }
-        }
-        has_digits
-    }
-
-    /// Eats the float exponent. Returns true if at least one digit was met,
-    /// and returns false otherwise.
-    fn eat_float_exponent(&mut self) -> bool {
-        debug_assert!(self.prev() == 'e' || self.prev() == 'E');
-        if self.first() == '-' || self.first() == '+' {
-            self.bump();
-        }
-        self.eat_decimal_digits()
-    }
-
-    // Eats the suffix of the literal, e.g. "_u8".
-    fn eat_literal_suffix(&mut self) {
-        self.eat_identifier();
-    }
-
-    // Eats the identifier.
+    /// Eats one identifier.
     fn eat_identifier(&mut self) {
         if !is_id_start(self.first()) {
             return;
@@ -506,11 +457,4 @@ impl Cursor<'_> {
 
         self.eat_while(is_id_continue);
     }
-
-    /// Eats symbols while predicate returns true or until the end of file is reached.
-    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
-        while predicate(self.first()) && !self.is_eof() {
-            self.bump();
-        }
-    }
 }
diff --git a/compiler/rustc_lexer/src/literals.rs b/compiler/rustc_lexer/src/literals.rs
index 2bfa5f9228b84..401b4a5df653d 100644
--- a/compiler/rustc_lexer/src/literals.rs
+++ b/compiler/rustc_lexer/src/literals.rs
@@ -60,21 +60,21 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
             'b' => {
                 base = Base::Binary;
                 cursor.bump();
-                cursor.eat_decimal_digits()
+                eat_decimal_digits(cursor)
             }
             'o' => {
                 base = Base::Octal;
                 cursor.bump();
-                cursor.eat_decimal_digits()
+                eat_decimal_digits(cursor)
             }
             'x' => {
                 base = Base::Hexadecimal;
                 cursor.bump();
-                cursor.eat_hexadecimal_digits()
+                eat_hexadecimal_digits(cursor)
             }
             // Not a base prefix.
             '0'..='9' | '_' | '.' | 'e' | 'E' => {
-                cursor.eat_decimal_digits();
+                eat_decimal_digits(cursor);
                 true
             }
             // Just a 0.
@@ -87,7 +87,7 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
         }
     } else {
         // No base prefix, parse number in the usual way.
-        cursor.eat_decimal_digits();
+        eat_decimal_digits(cursor);
     };
 
     match cursor.first() {
@@ -100,11 +100,11 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
             cursor.bump();
             let mut empty_exponent = false;
             if cursor.first().is_digit(10) {
-                cursor.eat_decimal_digits();
+                eat_decimal_digits(cursor);
                 match cursor.first() {
                     'e' | 'E' => {
                         cursor.bump();
-                        empty_exponent = !cursor.eat_float_exponent();
+                        empty_exponent = !eat_float_exponent(cursor);
                     }
                     _ => (),
                 }
@@ -113,13 +113,57 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
         }
         'e' | 'E' => {
             cursor.bump();
-            let empty_exponent = !cursor.eat_float_exponent();
+            let empty_exponent = !eat_float_exponent(cursor);
             LiteralKind::Float { base, empty_exponent }
         }
         _ => LiteralKind::Int { base, empty_int: false },
     }
 }
 
+pub(crate) fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
+    let mut has_digits = false;
+    loop {
+        match cursor.first() {
+            '_' => {
+                cursor.bump();
+            }
+            '0'..='9' => {
+                has_digits = true;
+                cursor.bump();
+            }
+            _ => break,
+        }
+    }
+    has_digits
+}
+
+pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
+    let mut has_digits = false;
+    loop {
+        match cursor.first() {
+            '_' => {
+                cursor.bump();
+            }
+            '0'..='9' | 'a'..='f' | 'A'..='F' => {
+                has_digits = true;
+                cursor.bump();
+            }
+            _ => break,
+        }
+    }
+    has_digits
+}
+
+/// Eats the float exponent. Returns true if at least one digit was met,
+/// and returns false otherwise.
+fn eat_float_exponent(cursor: &mut Cursor) -> bool {
+    debug_assert!(cursor.prev() == 'e' || cursor.prev() == 'E');
+    if cursor.first() == '-' || cursor.first() == '+' {
+        cursor.bump();
+    }
+    eat_decimal_digits(cursor)
+}
+
 pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
     debug_assert!(cursor.prev() == '\'');
 
@@ -137,7 +181,7 @@ pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
         let terminated = single_quoted_string(cursor);
         let suffix_start = cursor.len_consumed();
         if terminated {
-            cursor.eat_literal_suffix();
+            eat_literal_suffix(cursor);
         }
         let kind = LiteralKind::Char { terminated };
         return TokenKind::Literal { kind, suffix_start };
@@ -307,3 +351,8 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
         }
     }
 }
+
+/// Eats the suffix of a literal, e.g. "_u8".
+pub(crate) fn eat_literal_suffix(cursor: &mut Cursor) {
+    cursor.eat_identifier();
+}

From 70a4bc89c70d1cba8d6bd2f961f3156962efab7a Mon Sep 17 00:00:00 2001
From: Julian Wollersberger <julian.wollersberger@gmx.at>
Date: Wed, 3 Mar 2021 23:06:03 +0100
Subject: [PATCH 3/6] Make `advance_token` a freestanding function. This has
 better separation of concern between the lexing and the Cursor's
 iterator-like functionality.

---
 compiler/rustc_lexer/src/lib.rs      | 372 +++++++++++++--------------
 compiler/rustc_lexer/src/literals.rs |   4 +-
 2 files changed, 187 insertions(+), 189 deletions(-)

diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 9855cf13092fb..a05a053324519 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -168,7 +168,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
 /// Parses the first token from the provided input string.
 pub fn first_token(input: &str) -> Token {
     debug_assert!(!input.is_empty());
-    Cursor::new(input).advance_token()
+    advance_token(&mut Cursor::new(input))
 }
 
 /// Creates an iterator that produces tokens from the input string.
@@ -250,211 +250,209 @@ pub fn is_ident(string: &str) -> bool {
     }
 }
 
-impl Cursor<'_> {
-    /// Parses a token from the input string.
-    fn advance_token(&mut self) -> Token {
-        let first_char = self.bump().unwrap();
-        let token_kind = match first_char {
-            // Slash, comment or block comment.
-            '/' => match self.first() {
-                '/' => self.line_comment(),
-                '*' => self.block_comment(),
-                _ => Slash,
-            },
-
-            // Whitespace sequence.
-            c if is_whitespace(c) => self.whitespace(),
-
-            // Raw identifier, raw string literal or identifier.
-            'r' => match (self.first(), self.second()) {
-                ('#', c1) if is_id_start(c1) => self.raw_ident(),
-                ('#', _) | ('"', _) => {
-                    let (n_hashes, err) = raw_double_quoted_string(self, 1);
-                    let suffix_start = self.len_consumed();
-                    if err.is_none() {
-                        eat_literal_suffix(self);
-                    }
-                    let kind = LiteralKind::RawStr { n_hashes, err };
-                    Literal { kind, suffix_start }
+/// Parses a token from the input string.
+fn advance_token(cursor: &mut Cursor) -> Token {
+    let first_char = cursor.bump().unwrap();
+    let token_kind = match first_char {
+        // Slash, comment or block comment.
+        '/' => match cursor.first() {
+            '/' => line_comment(cursor),
+            '*' => block_comment(cursor),
+            _ => Slash,
+        },
+
+        // Whitespace sequence.
+        c if is_whitespace(c) => whitespace(cursor),
+
+        // Raw identifier, raw string literal or identifier.
+        'r' => match (cursor.first(), cursor.second()) {
+            ('#', c1) if is_id_start(c1) => raw_ident(cursor),
+            ('#', _) | ('"', _) => {
+                let (n_hashes, err) = raw_double_quoted_string(cursor, 1);
+                let suffix_start = cursor.len_consumed();
+                if err.is_none() {
+                    eat_literal_suffix(cursor);
                 }
-                _ => self.ident(),
-            },
-
-            // Byte literal, byte string literal, raw byte string literal or identifier.
-            'b' => match (self.first(), self.second()) {
-                ('\'', _) => {
-                    self.bump();
-                    let terminated = single_quoted_string(self);
-                    let suffix_start = self.len_consumed();
-                    if terminated {
-                        eat_literal_suffix(self);
-                    }
-                    let kind = LiteralKind::Byte { terminated };
-                    Literal { kind, suffix_start }
-                }
-                ('"', _) => {
-                    self.bump();
-                    let terminated = double_quoted_string(self);
-                    let suffix_start = self.len_consumed();
-                    if terminated {
-                        eat_literal_suffix(self);
-                    }
-                    let kind = LiteralKind::ByteStr { terminated };
-                    Literal { kind, suffix_start }
-                }
-                ('r', '"') | ('r', '#') => {
-                    self.bump();
-                    let (n_hashes, err) = raw_double_quoted_string(self, 2);
-                    let suffix_start = self.len_consumed();
-                    if err.is_none() {
-                        eat_literal_suffix(self);
-                    }
-                    let kind = LiteralKind::RawByteStr { n_hashes, err };
-                    Literal { kind, suffix_start }
+                let kind = LiteralKind::RawStr { n_hashes, err };
+                Literal { kind, suffix_start }
+            }
+            _ => ident(cursor),
+        },
+
+        // Byte literal, byte string literal, raw byte string literal or identifier.
+        'b' => match (cursor.first(), cursor.second()) {
+            ('\'', _) => {
+                cursor.bump();
+                let terminated = single_quoted_string(cursor);
+                let suffix_start = cursor.len_consumed();
+                if terminated {
+                    eat_literal_suffix(cursor);
                 }
-                _ => self.ident(),
-            },
-
-            // Identifier (this should be checked after other variant that can
-            // start as identifier).
-            c if is_id_start(c) => self.ident(),
-
-            // Numeric literal.
-            c @ '0'..='9' => {
-                let literal_kind = number(self, c);
-                let suffix_start = self.len_consumed();
-                eat_literal_suffix(self);
-                TokenKind::Literal { kind: literal_kind, suffix_start }
+                let kind = LiteralKind::Byte { terminated };
+                Literal { kind, suffix_start }
             }
-
-            // One-symbol tokens.
-            ';' => Semi,
-            ',' => Comma,
-            '.' => Dot,
-            '(' => OpenParen,
-            ')' => CloseParen,
-            '{' => OpenBrace,
-            '}' => CloseBrace,
-            '[' => OpenBracket,
-            ']' => CloseBracket,
-            '@' => At,
-            '#' => Pound,
-            '~' => Tilde,
-            '?' => Question,
-            ':' => Colon,
-            '$' => Dollar,
-            '=' => Eq,
-            '!' => Bang,
-            '<' => Lt,
-            '>' => Gt,
-            '-' => Minus,
-            '&' => And,
-            '|' => Or,
-            '+' => Plus,
-            '*' => Star,
-            '^' => Caret,
-            '%' => Percent,
-
-            // Lifetime or character literal.
-            '\'' => lifetime_or_char(self),
-
-            // String literal.
-            '"' => {
-                let terminated = double_quoted_string(self);
-                let suffix_start = self.len_consumed();
+            ('"', _) => {
+                cursor.bump();
+                let terminated = double_quoted_string(cursor);
+                let suffix_start = cursor.len_consumed();
                 if terminated {
-                    eat_literal_suffix(self);
+                    eat_literal_suffix(cursor);
                 }
-                let kind = LiteralKind::Str { terminated };
+                let kind = LiteralKind::ByteStr { terminated };
                 Literal { kind, suffix_start }
             }
-            _ => Unknown,
-        };
-        Token::new(token_kind, self.len_consumed())
-    }
+            ('r', '"') | ('r', '#') => {
+                cursor.bump();
+                let (n_hashes, err) = raw_double_quoted_string(cursor, 2);
+                let suffix_start = cursor.len_consumed();
+                if err.is_none() {
+                    eat_literal_suffix(cursor);
+                }
+                let kind = LiteralKind::RawByteStr { n_hashes, err };
+                Literal { kind, suffix_start }
+            }
+            _ => ident(cursor),
+        },
+
+        // Identifier (this should be checked after other variant that can
+        // start as identifier).
+        c if is_id_start(c) => ident(cursor),
+
+        // Numeric literal.
+        c @ '0'..='9' => {
+            let literal_kind = number(cursor, c);
+            let suffix_start = cursor.len_consumed();
+            eat_literal_suffix(cursor);
+            TokenKind::Literal { kind: literal_kind, suffix_start }
+        }
+
+        // One-symbol tokens.
+        ';' => Semi,
+        ',' => Comma,
+        '.' => Dot,
+        '(' => OpenParen,
+        ')' => CloseParen,
+        '{' => OpenBrace,
+        '}' => CloseBrace,
+        '[' => OpenBracket,
+        ']' => CloseBracket,
+        '@' => At,
+        '#' => Pound,
+        '~' => Tilde,
+        '?' => Question,
+        ':' => Colon,
+        '$' => Dollar,
+        '=' => Eq,
+        '!' => Bang,
+        '<' => Lt,
+        '>' => Gt,
+        '-' => Minus,
+        '&' => And,
+        '|' => Or,
+        '+' => Plus,
+        '*' => Star,
+        '^' => Caret,
+        '%' => Percent,
+
+        // Lifetime or character literal.
+        '\'' => lifetime_or_char(cursor),
+
+        // String literal.
+        '"' => {
+            let terminated = double_quoted_string(cursor);
+            let suffix_start = cursor.len_consumed();
+            if terminated {
+                eat_literal_suffix(cursor);
+            }
+            let kind = LiteralKind::Str { terminated };
+            Literal { kind, suffix_start }
+        }
+        _ => Unknown,
+    };
+    Token::new(token_kind, cursor.len_consumed())
+}
 
-    fn line_comment(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == '/' && self.first() == '/');
-        self.bump();
+fn line_comment(cursor: &mut Cursor) -> TokenKind {
+    debug_assert!(cursor.prev() == '/' && cursor.first() == '/');
+    cursor.bump();
 
-        let doc_style = match self.first() {
-            // `//!` is an inner line doc comment.
-            '!' => Some(DocStyle::Inner),
-            // `////` (more than 3 slashes) is not considered a doc comment.
-            '/' if self.second() != '/' => Some(DocStyle::Outer),
-            _ => None,
-        };
+    let doc_style = match cursor.first() {
+        // `//!` is an inner line doc comment.
+        '!' => Some(DocStyle::Inner),
+        // `////` (more than 3 slashes) is not considered a doc comment.
+        '/' if cursor.second() != '/' => Some(DocStyle::Outer),
+        _ => None,
+    };
 
-        self.eat_while(|c| c != '\n');
-        LineComment { doc_style }
-    }
+    cursor.eat_while(|c| c != '\n');
+    LineComment { doc_style }
+}
 
-    fn block_comment(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == '/' && self.first() == '*');
-        self.bump();
-
-        let doc_style = match self.first() {
-            // `/*!` is an inner block doc comment.
-            '!' => Some(DocStyle::Inner),
-            // `/***` (more than 2 stars) is not considered a doc comment.
-            // `/**/` is not considered a doc comment.
-            '*' if !matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
-            _ => None,
-        };
-
-        let mut depth = 1usize;
-        while let Some(c) = self.bump() {
-            match c {
-                '/' if self.first() == '*' => {
-                    self.bump();
-                    depth += 1;
-                }
-                '*' if self.first() == '/' => {
-                    self.bump();
-                    depth -= 1;
-                    if depth == 0 {
-                        // This block comment is closed, so for a construction like "/* */ */"
-                        // there will be a successfully parsed block comment "/* */"
-                        // and " */" will be processed separately.
-                        break;
-                    }
+fn block_comment(cursor: &mut Cursor) -> TokenKind {
+    debug_assert!(cursor.prev() == '/' && cursor.first() == '*');
+    cursor.bump();
+
+    let doc_style = match cursor.first() {
+        // `/*!` is an inner block doc comment.
+        '!' => Some(DocStyle::Inner),
+        // `/***` (more than 2 stars) is not considered a doc comment.
+        // `/**/` is not considered a doc comment.
+        '*' if !matches!(cursor.second(), '*' | '/') => Some(DocStyle::Outer),
+        _ => None,
+    };
+
+    let mut depth = 1usize;
+    while let Some(c) = cursor.bump() {
+        match c {
+            '/' if cursor.first() == '*' => {
+                cursor.bump();
+                depth += 1;
+            }
+            '*' if cursor.first() == '/' => {
+                cursor.bump();
+                depth -= 1;
+                if depth == 0 {
+                    // This block comment is closed, so for a construction like "/* */ */"
+                    // there will be a successfully parsed block comment "/* */"
+                    // and " */" will be processed separately.
+                    break;
                 }
-                _ => (),
             }
+            _ => (),
         }
-
-        BlockComment { doc_style, terminated: depth == 0 }
     }
 
-    fn whitespace(&mut self) -> TokenKind {
-        debug_assert!(is_whitespace(self.prev()));
-        self.eat_while(is_whitespace);
-        Whitespace
-    }
+    BlockComment { doc_style, terminated: depth == 0 }
+}
 
-    fn raw_ident(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == 'r' && self.first() == '#' && is_id_start(self.second()));
-        // Eat "#" symbol.
-        self.bump();
-        // Eat the identifier part of RawIdent.
-        self.eat_identifier();
-        RawIdent
-    }
+fn whitespace(cursor: &mut Cursor) -> TokenKind {
+    debug_assert!(is_whitespace(cursor.prev()));
+    cursor.eat_while(is_whitespace);
+    Whitespace
+}
 
-    fn ident(&mut self) -> TokenKind {
-        debug_assert!(is_id_start(self.prev()));
-        // Start is already eaten, eat the rest of identifier.
-        self.eat_while(is_id_continue);
-        Ident
-    }
+fn raw_ident(cursor: &mut Cursor) -> TokenKind {
+    debug_assert!(cursor.prev() == 'r' && cursor.first() == '#' && is_id_start(cursor.second()));
+    // Eat "#" symbol.
+    cursor.bump();
+    // Eat the identifier part of RawIdent.
+    eat_identifier(cursor);
+    RawIdent
+}
 
-    /// Eats one identifier.
-    fn eat_identifier(&mut self) {
-        if !is_id_start(self.first()) {
-            return;
-        }
-        self.bump();
+fn ident(cursor: &mut Cursor) -> TokenKind {
+    debug_assert!(is_id_start(cursor.prev()));
+    // Start is already eaten, eat the rest of identifier.
+    cursor.eat_while(is_id_continue);
+    Ident
+}
 
-        self.eat_while(is_id_continue);
+/// Eats one identifier.
+pub(crate) fn eat_identifier(cursor: &mut Cursor) {
+    if !is_id_start(cursor.first()) {
+        return;
     }
+    cursor.bump();
+
+    cursor.eat_while(is_id_continue);
 }
diff --git a/compiler/rustc_lexer/src/literals.rs b/compiler/rustc_lexer/src/literals.rs
index 401b4a5df653d..02adbd77cb3df 100644
--- a/compiler/rustc_lexer/src/literals.rs
+++ b/compiler/rustc_lexer/src/literals.rs
@@ -1,5 +1,5 @@
 use crate::cursor::{Cursor, EOF_CHAR};
-use crate::{is_id_continue, is_id_start, TokenKind};
+use crate::{is_id_continue, is_id_start, TokenKind, eat_identifier};
 use std::convert::TryFrom;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -354,5 +354,5 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
 
 /// Eats the suffix of a literal, e.g. "_u8".
 pub(crate) fn eat_literal_suffix(cursor: &mut Cursor) {
-    cursor.eat_identifier();
+    eat_identifier(cursor);
 }

From 629e161527a2edc0cbf2e2fa9cbe1f069309e230 Mon Sep 17 00:00:00 2001
From: Julian Wollersberger <julian.wollersberger@gmx.at>
Date: Wed, 3 Mar 2021 23:37:50 +0100
Subject: [PATCH 4/6] Simplified `cursor.rs` a bit and renamed `first` to
 `peek` and `eat_while` to `bump_while`.

---
 compiler/rustc_lexer/src/cursor.rs   | 28 +++++++------------
 compiler/rustc_lexer/src/lib.rs      | 34 +++++++++++------------
 compiler/rustc_lexer/src/literals.rs | 40 ++++++++++++++--------------
 3 files changed, 46 insertions(+), 56 deletions(-)

diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
index e188102c8e580..5110d7a109aaa 100644
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -2,7 +2,7 @@ use std::str::Chars;
 
 /// Peekable iterator over a char sequence.
 ///
-/// Next characters can be peeked via `nth_char` method,
+/// Next characters can be peeked via `peek` method,
 /// and position can be shifted forward via `bump` method.
 pub(crate) struct Cursor<'a> {
     initial_len: usize,
@@ -37,22 +37,17 @@ impl<'a> Cursor<'a> {
         }
     }
 
-    /// Returns nth character relative to the current cursor position.
-    /// If requested position doesn't exist, `EOF_CHAR` is returned.
+    /// Peeks the next symbol from the input stream without consuming it.
+    /// If it doesn't exist, `EOF_CHAR` is returned.
     /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
     /// it should be checked with `is_eof` method.
-    fn nth_char(&self, n: usize) -> char {
-        self.chars().nth(n).unwrap_or(EOF_CHAR)
-    }
-
-    /// Peeks the next symbol from the input stream without consuming it.
-    pub(crate) fn first(&self) -> char {
-        self.nth_char(0)
+    pub(crate) fn peek(&self) -> char {
+        self.chars.clone().nth(0).unwrap_or(EOF_CHAR)
     }
 
     /// Peeks the second symbol from the input stream without consuming it.
-    pub(crate) fn second(&self) -> char {
-        self.nth_char(1)
+    pub(crate) fn peek_second(&self) -> char {
+        self.chars.clone().nth(1).unwrap_or(EOF_CHAR)
     }
 
     /// Checks if there is nothing more to consume.
@@ -65,11 +60,6 @@ impl<'a> Cursor<'a> {
         self.initial_len - self.chars.as_str().len()
     }
 
-    /// Returns a `Chars` iterator over the remaining characters.
-    fn chars(&self) -> Chars<'a> {
-        self.chars.clone()
-    }
-
     /// Moves to the next character.
     pub(crate) fn bump(&mut self) -> Option<char> {
         let c = self.chars.next()?;
@@ -83,8 +73,8 @@ impl<'a> Cursor<'a> {
     }
 
     /// Eats symbols while predicate returns true or until the end of file is reached.
-    pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
-        while predicate(self.first()) && !self.is_eof() {
+    pub(crate) fn bump_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
+        while predicate(self.peek()) && !self.is_eof() {
             self.bump();
         }
     }
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index a05a053324519..2f934f16a7cd4 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -255,7 +255,7 @@ fn advance_token(cursor: &mut Cursor) -> Token {
     let first_char = cursor.bump().unwrap();
     let token_kind = match first_char {
         // Slash, comment or block comment.
-        '/' => match cursor.first() {
+        '/' => match cursor.peek() {
             '/' => line_comment(cursor),
             '*' => block_comment(cursor),
             _ => Slash,
@@ -265,7 +265,7 @@ fn advance_token(cursor: &mut Cursor) -> Token {
         c if is_whitespace(c) => whitespace(cursor),
 
         // Raw identifier, raw string literal or identifier.
-        'r' => match (cursor.first(), cursor.second()) {
+        'r' => match (cursor.peek(), cursor.peek_second()) {
             ('#', c1) if is_id_start(c1) => raw_ident(cursor),
             ('#', _) | ('"', _) => {
                 let (n_hashes, err) = raw_double_quoted_string(cursor, 1);
@@ -280,7 +280,7 @@ fn advance_token(cursor: &mut Cursor) -> Token {
         },
 
         // Byte literal, byte string literal, raw byte string literal or identifier.
-        'b' => match (cursor.first(), cursor.second()) {
+        'b' => match (cursor.peek(), cursor.peek_second()) {
             ('\'', _) => {
                 cursor.bump();
                 let terminated = single_quoted_string(cursor);
@@ -373,42 +373,42 @@ fn advance_token(cursor: &mut Cursor) -> Token {
 }
 
 fn line_comment(cursor: &mut Cursor) -> TokenKind {
-    debug_assert!(cursor.prev() == '/' && cursor.first() == '/');
+    debug_assert!(cursor.prev() == '/' && cursor.peek() == '/');
     cursor.bump();
 
-    let doc_style = match cursor.first() {
+    let doc_style = match cursor.peek() {
         // `//!` is an inner line doc comment.
         '!' => Some(DocStyle::Inner),
         // `////` (more than 3 slashes) is not considered a doc comment.
-        '/' if cursor.second() != '/' => Some(DocStyle::Outer),
+        '/' if cursor.peek_second() != '/' => Some(DocStyle::Outer),
         _ => None,
     };
 
-    cursor.eat_while(|c| c != '\n');
+    cursor.bump_while(|c| c != '\n');
     LineComment { doc_style }
 }
 
 fn block_comment(cursor: &mut Cursor) -> TokenKind {
-    debug_assert!(cursor.prev() == '/' && cursor.first() == '*');
+    debug_assert!(cursor.prev() == '/' && cursor.peek() == '*');
     cursor.bump();
 
-    let doc_style = match cursor.first() {
+    let doc_style = match cursor.peek() {
         // `/*!` is an inner block doc comment.
         '!' => Some(DocStyle::Inner),
         // `/***` (more than 2 stars) is not considered a doc comment.
         // `/**/` is not considered a doc comment.
-        '*' if !matches!(cursor.second(), '*' | '/') => Some(DocStyle::Outer),
+        '*' if !matches!(cursor.peek_second(), '*' | '/') => Some(DocStyle::Outer),
         _ => None,
     };
 
     let mut depth = 1usize;
     while let Some(c) = cursor.bump() {
         match c {
-            '/' if cursor.first() == '*' => {
+            '/' if cursor.peek() == '*' => {
                 cursor.bump();
                 depth += 1;
             }
-            '*' if cursor.first() == '/' => {
+            '*' if cursor.peek() == '/' => {
                 cursor.bump();
                 depth -= 1;
                 if depth == 0 {
@@ -427,12 +427,12 @@ fn block_comment(cursor: &mut Cursor) -> TokenKind {
 
 fn whitespace(cursor: &mut Cursor) -> TokenKind {
     debug_assert!(is_whitespace(cursor.prev()));
-    cursor.eat_while(is_whitespace);
+    cursor.bump_while(is_whitespace);
     Whitespace
 }
 
 fn raw_ident(cursor: &mut Cursor) -> TokenKind {
-    debug_assert!(cursor.prev() == 'r' && cursor.first() == '#' && is_id_start(cursor.second()));
+    debug_assert!(cursor.prev() == 'r' && cursor.peek() == '#' && is_id_start(cursor.peek_second()));
     // Eat "#" symbol.
     cursor.bump();
     // Eat the identifier part of RawIdent.
@@ -443,16 +443,16 @@ fn raw_ident(cursor: &mut Cursor) -> TokenKind {
 fn ident(cursor: &mut Cursor) -> TokenKind {
     debug_assert!(is_id_start(cursor.prev()));
     // Start is already eaten, eat the rest of identifier.
-    cursor.eat_while(is_id_continue);
+    cursor.bump_while(is_id_continue);
     Ident
 }
 
 /// Eats one identifier.
 pub(crate) fn eat_identifier(cursor: &mut Cursor) {
-    if !is_id_start(cursor.first()) {
+    if !is_id_start(cursor.peek()) {
         return;
     }
     cursor.bump();
 
-    cursor.eat_while(is_id_continue);
+    cursor.bump_while(is_id_continue);
 }
diff --git a/compiler/rustc_lexer/src/literals.rs b/compiler/rustc_lexer/src/literals.rs
index 02adbd77cb3df..daf231bceb39e 100644
--- a/compiler/rustc_lexer/src/literals.rs
+++ b/compiler/rustc_lexer/src/literals.rs
@@ -56,7 +56,7 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
     let mut base = Base::Decimal;
     if first_digit == '0' {
         // Attempt to parse encoding base.
-        let has_digits = match cursor.first() {
+        let has_digits = match cursor.peek() {
             'b' => {
                 base = Base::Binary;
                 cursor.bump();
@@ -90,18 +90,18 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
         eat_decimal_digits(cursor);
     };
 
-    match cursor.first() {
+    match cursor.peek() {
         // Don't be greedy if this is actually an
         // integer literal followed by field/method access or a range pattern
         // (`0..2` and `12.foo()`)
-        '.' if cursor.second() != '.' && !is_id_start(cursor.second()) => {
+        '.' if cursor.peek_second() != '.' && !is_id_start(cursor.peek_second()) => {
             // might have stuff after the ., and if it does, it needs to start
             // with a number
             cursor.bump();
             let mut empty_exponent = false;
-            if cursor.first().is_digit(10) {
+            if cursor.peek().is_digit(10) {
                 eat_decimal_digits(cursor);
-                match cursor.first() {
+                match cursor.peek() {
                     'e' | 'E' => {
                         cursor.bump();
                         empty_exponent = !eat_float_exponent(cursor);
@@ -123,7 +123,7 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
 pub(crate) fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
     let mut has_digits = false;
     loop {
-        match cursor.first() {
+        match cursor.peek() {
             '_' => {
                 cursor.bump();
             }
@@ -140,7 +140,7 @@ pub(crate) fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
 pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
     let mut has_digits = false;
     loop {
-        match cursor.first() {
+        match cursor.peek() {
             '_' => {
                 cursor.bump();
             }
@@ -158,7 +158,7 @@ pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
 /// and returns false otherwise.
 fn eat_float_exponent(cursor: &mut Cursor) -> bool {
     debug_assert!(cursor.prev() == 'e' || cursor.prev() == 'E');
-    if cursor.first() == '-' || cursor.first() == '+' {
+    if cursor.peek() == '-' || cursor.peek() == '+' {
         cursor.bump();
     }
     eat_decimal_digits(cursor)
@@ -167,14 +167,14 @@ fn eat_float_exponent(cursor: &mut Cursor) -> bool {
 pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
     debug_assert!(cursor.prev() == '\'');
 
-    let can_be_a_lifetime = if cursor.second() == '\'' {
+    let can_be_a_lifetime = if cursor.peek_second() == '\'' {
         // It's surely not a lifetime.
         false
     } else {
         // If the first symbol is valid for identifier, it can be a lifetime.
         // Also check if it's a number for a better error reporting (so '0 will
         // be reported as invalid lifetime and not as unterminated char literal).
-        is_id_start(cursor.first()) || cursor.first().is_digit(10)
+        is_id_start(cursor.peek()) || cursor.peek().is_digit(10)
     };
 
     if !can_be_a_lifetime {
@@ -190,18 +190,18 @@ pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
     // Either a lifetime or a character literal with
     // length greater than 1.
 
-    let starts_with_number = cursor.first().is_digit(10);
+    let starts_with_number = cursor.peek().is_digit(10);
 
     // Skip the literal contents.
     // First symbol can be a number (which isn't a valid identifier start),
     // so skip it without any checks.
     cursor.bump();
-    cursor.eat_while(is_id_continue);
+    cursor.bump_while(is_id_continue);
 
     // Check if after skipping literal contents we've met a closing
     // single quote (which means that user attempted to create a
     // string with single quotes).
-    if cursor.first() == '\'' {
+    if cursor.peek() == '\'' {
         cursor.bump();
         let kind = LiteralKind::Char { terminated: true };
         TokenKind::Literal { kind, suffix_start: cursor.len_consumed() }
@@ -213,7 +213,7 @@ pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
 pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
     debug_assert!(cursor.prev() == '\'');
     // Check if it's a one-symbol literal.
-    if cursor.second() == '\'' && cursor.first() != '\\' {
+    if cursor.peek_second() == '\'' && cursor.peek() != '\\' {
         cursor.bump();
         cursor.bump();
         return true;
@@ -223,7 +223,7 @@ pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
 
     // Parse until either quotes are terminated or error is detected.
     loop {
-        match cursor.first() {
+        match cursor.peek() {
             // Quotes are terminated, finish parsing.
             '\'' => {
                 cursor.bump();
@@ -233,7 +233,7 @@ pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
             // to the error report.
             '/' => break,
             // Newline without following '\'' means unclosed quote, stop parsing.
-            '\n' if cursor.second() != '\'' => break,
+            '\n' if cursor.peek_second() != '\'' => break,
             // End of file, stop parsing.
             EOF_CHAR if cursor.is_eof() => break,
             // Escaped slash is considered one character, so bump twice.
@@ -260,7 +260,7 @@ pub(crate) fn double_quoted_string(cursor: &mut Cursor) -> bool {
             '"' => {
                 return true;
             }
-            '\\' if cursor.first() == '\\' || cursor.first() == '"' => {
+            '\\' if cursor.peek() == '\\' || cursor.peek() == '"' => {
                 // Bump again to skip escaped character.
                 cursor.bump();
             }
@@ -295,7 +295,7 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
 
     // Count opening '#' symbols.
     let mut eaten = 0;
-    while cursor.first() == '#' {
+    while cursor.peek() == '#' {
         eaten += 1;
         cursor.bump();
     }
@@ -313,7 +313,7 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
     // Skip the string contents and on each '#' character met, check if this is
     // a raw string termination.
     loop {
-        cursor.eat_while(|c| c != '"');
+        cursor.bump_while(|c| c != '"');
 
         if cursor.is_eof() {
             return (
@@ -335,7 +335,7 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
         // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
         // followed by a `#` token.
         let mut n_end_hashes = 0;
-        while cursor.first() == '#' && n_end_hashes < n_start_hashes {
+        while cursor.peek() == '#' && n_end_hashes < n_start_hashes {
             n_end_hashes += 1;
             cursor.bump();
         }

From d4336a52a2b18a844c844aab5f54efff60053502 Mon Sep 17 00:00:00 2001
From: Julian Wollersberger <julian.wollersberger@gmx.at>
Date: Thu, 4 Mar 2021 10:14:11 +0100
Subject: [PATCH 5/6] Inline some helper functions that are only used once, to
 lower the number of things I need to keep in my head. And fix imports in
 tests.rs.

---
 compiler/rustc_lexer/src/lib.rs      | 60 ++++++++++------------------
 compiler/rustc_lexer/src/literals.rs | 11 +++--
 compiler/rustc_lexer/src/tests.rs    |  9 +++--
 3 files changed, 35 insertions(+), 45 deletions(-)

diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 2f934f16a7cd4..9ad70ce8b7623 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -28,11 +28,13 @@ pub mod unescape;
 #[cfg(test)]
 mod tests;
 
+pub use crate::literals::{Base, LiteralKind, RawStrError};
+
 use self::TokenKind::*;
 use crate::cursor::Cursor;
 use crate::literals::{
     double_quoted_string, eat_literal_suffix, lifetime_or_char, number, raw_double_quoted_string,
-    single_quoted_string, LiteralKind,
+    single_quoted_string,
 };
 
 /// Parsed token.
@@ -165,12 +167,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
     None
 }
 
-/// Parses the first token from the provided input string.
-pub fn first_token(input: &str) -> Token {
-    debug_assert!(!input.is_empty());
-    advance_token(&mut Cursor::new(input))
-}
-
 /// Creates an iterator that produces tokens from the input string.
 pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
     std::iter::from_fn(move || {
@@ -250,8 +246,11 @@ pub fn is_ident(string: &str) -> bool {
     }
 }
 
-/// Parses a token from the input string.
-fn advance_token(cursor: &mut Cursor) -> Token {
+/// Parses the first token from the provided input string.
+pub fn first_token(input: &str) -> Token {
+    debug_assert!(!input.is_empty());
+    let cursor = &mut Cursor::new(input);
+
     let first_char = cursor.bump().unwrap();
     let token_kind = match first_char {
         // Slash, comment or block comment.
@@ -262,11 +261,21 @@ fn advance_token(cursor: &mut Cursor) -> Token {
         },
 
         // Whitespace sequence.
-        c if is_whitespace(c) => whitespace(cursor),
+        c if is_whitespace(c) => {
+            cursor.bump_while(is_whitespace);
+            Whitespace
+        }
 
         // Raw identifier, raw string literal or identifier.
         'r' => match (cursor.peek(), cursor.peek_second()) {
-            ('#', c1) if is_id_start(c1) => raw_ident(cursor),
+            ('#', c1) if is_id_start(c1) => {
+                // Eat "#" symbol.
+                cursor.bump();
+                // Eat the identifier part of RawIdent.
+                cursor.bump();
+                ident(cursor);
+                RawIdent
+            }
             ('#', _) | ('"', _) => {
                 let (n_hashes, err) = raw_double_quoted_string(cursor, 1);
                 let suffix_start = cursor.len_consumed();
@@ -425,34 +434,9 @@ fn block_comment(cursor: &mut Cursor) -> TokenKind {
     BlockComment { doc_style, terminated: depth == 0 }
 }
 
-fn whitespace(cursor: &mut Cursor) -> TokenKind {
-    debug_assert!(is_whitespace(cursor.prev()));
-    cursor.bump_while(is_whitespace);
-    Whitespace
-}
-
-fn raw_ident(cursor: &mut Cursor) -> TokenKind {
-    debug_assert!(cursor.prev() == 'r' && cursor.peek() == '#' && is_id_start(cursor.peek_second()));
-    // Eat "#" symbol.
-    cursor.bump();
-    // Eat the identifier part of RawIdent.
-    eat_identifier(cursor);
-    RawIdent
-}
-
-fn ident(cursor: &mut Cursor) -> TokenKind {
+/// Start is already eaten, eat the rest of identifier.
+pub(crate) fn ident(cursor: &mut Cursor) -> TokenKind {
     debug_assert!(is_id_start(cursor.prev()));
-    // Start is already eaten, eat the rest of identifier.
     cursor.bump_while(is_id_continue);
     Ident
 }
-
-/// Eats one identifier.
-pub(crate) fn eat_identifier(cursor: &mut Cursor) {
-    if !is_id_start(cursor.peek()) {
-        return;
-    }
-    cursor.bump();
-
-    cursor.bump_while(is_id_continue);
-}
diff --git a/compiler/rustc_lexer/src/literals.rs b/compiler/rustc_lexer/src/literals.rs
index daf231bceb39e..5aa0bac31681e 100644
--- a/compiler/rustc_lexer/src/literals.rs
+++ b/compiler/rustc_lexer/src/literals.rs
@@ -1,5 +1,5 @@
 use crate::cursor::{Cursor, EOF_CHAR};
-use crate::{is_id_continue, is_id_start, TokenKind, eat_identifier};
+use crate::{ident, is_id_continue, is_id_start, TokenKind};
 use std::convert::TryFrom;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -279,7 +279,8 @@ pub(crate) fn raw_double_quoted_string(
     // Wrap the actual function to handle the error with too many hashes.
     // This way, it eats the whole raw string.
     let (n_hashes, err) = raw_string_unvalidated(cursor, prefix_len);
-    // Only up to 65535 `#`s are allowed in raw strings
+
+    // Only up to 65535 `#`s are allowed in raw strings.
     match u16::try_from(n_hashes) {
         Ok(num) => (num, err),
         // We lie about the number of hashes here :P
@@ -354,5 +355,9 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
 
 /// Eats the suffix of a literal, e.g. "_u8".
 pub(crate) fn eat_literal_suffix(cursor: &mut Cursor) {
-    eat_identifier(cursor);
+    // Eats one identifier.
+    if is_id_start(cursor.peek()) {
+        cursor.bump();
+        ident(cursor);
+    }
 }
diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs
index 94017b7b286e2..8f1a968526609 100644
--- a/compiler/rustc_lexer/src/tests.rs
+++ b/compiler/rustc_lexer/src/tests.rs
@@ -1,12 +1,13 @@
-use super::*;
-
+use crate::cursor::Cursor;
+use crate::literals::{raw_double_quoted_string, RawStrError};
+use crate::{strip_shebang, tokenize};
 use expect_test::{expect, Expect};
 
 fn check_raw_str(s: &str, expected_hashes: u16, expected_err: Option<RawStrError>) {
     let s = &format!("r{}", s);
-    let mut cursor = Cursor::new(s);
+    let cursor = &mut Cursor::new(s);
     cursor.bump();
-    let (n_hashes, err) = cursor.raw_double_quoted_string(0);
+    let (n_hashes, err) = raw_double_quoted_string(cursor, 0);
     assert_eq!(n_hashes, expected_hashes);
     assert_eq!(err, expected_err);
 }

From 7149a21c54730d21185fecf7a5cd324c7b555056 Mon Sep 17 00:00:00 2001
From: Julian Wollersberger <julian.wollersberger@gmx.at>
Date: Thu, 4 Mar 2021 11:13:17 +0100
Subject: [PATCH 6/6] Address the "Hidden lifetime in path" warning. This one
 wasn't shown with `cargo check`.

---
 compiler/rustc_lexer/src/lib.rs      |  6 +++---
 compiler/rustc_lexer/src/literals.rs | 23 +++++++++++++----------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 9ad70ce8b7623..dacda8b3ca7a0 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -381,7 +381,7 @@ pub fn first_token(input: &str) -> Token {
     Token::new(token_kind, cursor.len_consumed())
 }
 
-fn line_comment(cursor: &mut Cursor) -> TokenKind {
+fn line_comment(cursor: &mut Cursor<'_>) -> TokenKind {
     debug_assert!(cursor.prev() == '/' && cursor.peek() == '/');
     cursor.bump();
 
@@ -397,7 +397,7 @@ fn line_comment(cursor: &mut Cursor) -> TokenKind {
     LineComment { doc_style }
 }
 
-fn block_comment(cursor: &mut Cursor) -> TokenKind {
+fn block_comment(cursor: &mut Cursor<'_>) -> TokenKind {
     debug_assert!(cursor.prev() == '/' && cursor.peek() == '*');
     cursor.bump();
 
@@ -435,7 +435,7 @@ fn block_comment(cursor: &mut Cursor) -> TokenKind {
 }
 
 /// Start is already eaten, eat the rest of identifier.
-pub(crate) fn ident(cursor: &mut Cursor) -> TokenKind {
+pub(crate) fn ident(cursor: &mut Cursor<'_>) -> TokenKind {
     debug_assert!(is_id_start(cursor.prev()));
     cursor.bump_while(is_id_continue);
     Ident
diff --git a/compiler/rustc_lexer/src/literals.rs b/compiler/rustc_lexer/src/literals.rs
index 5aa0bac31681e..33792622bec57 100644
--- a/compiler/rustc_lexer/src/literals.rs
+++ b/compiler/rustc_lexer/src/literals.rs
@@ -51,7 +51,7 @@ pub enum RawStrError {
     TooManyDelimiters { found: usize },
 }
 
-pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
+pub(crate) fn number(cursor: &mut Cursor<'_>, first_digit: char) -> LiteralKind {
     debug_assert!('0' <= cursor.prev() && cursor.prev() <= '9');
     let mut base = Base::Decimal;
     if first_digit == '0' {
@@ -120,7 +120,7 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
     }
 }
 
-pub(crate) fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
+pub(crate) fn eat_decimal_digits(cursor: &mut Cursor<'_>) -> bool {
     let mut has_digits = false;
     loop {
         match cursor.peek() {
@@ -137,7 +137,7 @@ pub(crate) fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
     has_digits
 }
 
-pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
+pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor<'_>) -> bool {
     let mut has_digits = false;
     loop {
         match cursor.peek() {
@@ -156,7 +156,7 @@ pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
 
 /// Eats the float exponent. Returns true if at least one digit was met,
 /// and returns false otherwise.
-fn eat_float_exponent(cursor: &mut Cursor) -> bool {
+fn eat_float_exponent(cursor: &mut Cursor<'_>) -> bool {
     debug_assert!(cursor.prev() == 'e' || cursor.prev() == 'E');
     if cursor.peek() == '-' || cursor.peek() == '+' {
         cursor.bump();
@@ -164,7 +164,7 @@ fn eat_float_exponent(cursor: &mut Cursor) -> bool {
     eat_decimal_digits(cursor)
 }
 
-pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
+pub(crate) fn lifetime_or_char(cursor: &mut Cursor<'_>) -> TokenKind {
     debug_assert!(cursor.prev() == '\'');
 
     let can_be_a_lifetime = if cursor.peek_second() == '\'' {
@@ -210,7 +210,7 @@ pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
     }
 }
 
-pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
+pub(crate) fn single_quoted_string(cursor: &mut Cursor<'_>) -> bool {
     debug_assert!(cursor.prev() == '\'');
     // Check if it's a one-symbol literal.
     if cursor.peek_second() == '\'' && cursor.peek() != '\\' {
@@ -253,7 +253,7 @@ pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
 
 /// Eats double-quoted string and returns true
 /// if string is terminated.
-pub(crate) fn double_quoted_string(cursor: &mut Cursor) -> bool {
+pub(crate) fn double_quoted_string(cursor: &mut Cursor<'_>) -> bool {
     debug_assert!(cursor.prev() == '"');
     while let Some(c) = cursor.bump() {
         match c {
@@ -273,7 +273,7 @@ pub(crate) fn double_quoted_string(cursor: &mut Cursor) -> bool {
 
 /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
 pub(crate) fn raw_double_quoted_string(
-    cursor: &mut Cursor,
+    cursor: &mut Cursor<'_>,
     prefix_len: usize,
 ) -> (u16, Option<RawStrError>) {
     // Wrap the actual function to handle the error with too many hashes.
@@ -288,7 +288,10 @@ pub(crate) fn raw_double_quoted_string(
     }
 }
 
-fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Option<RawStrError>) {
+fn raw_string_unvalidated(
+    cursor: &mut Cursor<'_>,
+    prefix_len: usize,
+) -> (usize, Option<RawStrError>) {
     debug_assert!(cursor.prev() == 'r');
     let start_pos = cursor.len_consumed();
     let mut possible_terminator_offset = None;
@@ -354,7 +357,7 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
 }
 
 /// Eats the suffix of a literal, e.g. "_u8".
-pub(crate) fn eat_literal_suffix(cursor: &mut Cursor) {
+pub(crate) fn eat_literal_suffix(cursor: &mut Cursor<'_>) {
     // Eats one identifier.
     if is_id_start(cursor.peek()) {
         cursor.bump();