diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
index 297f3d19ca178..5110d7a109aaa 100644
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -2,7 +2,7 @@ use std::str::Chars;
 
 /// Peekable iterator over a char sequence.
 ///
-/// Next characters can be peeked via `nth_char` method,
+/// Next characters can be peeked via `peek` method,
 /// and position can be shifted forward via `bump` method.
 pub(crate) struct Cursor<'a> {
     initial_len: usize,
@@ -37,22 +37,17 @@ impl<'a> Cursor<'a> {
         }
     }
 
-    /// Returns nth character relative to the current cursor position.
-    /// If requested position doesn't exist, `EOF_CHAR` is returned.
+    /// Peeks the next symbol from the input stream without consuming it.
+    /// If it doesn't exist, `EOF_CHAR` is returned.
     /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
     /// it should be checked with `is_eof` method.
-    fn nth_char(&self, n: usize) -> char {
-        self.chars().nth(n).unwrap_or(EOF_CHAR)
-    }
-
-    /// Peeks the next symbol from the input stream without consuming it.
-    pub(crate) fn first(&self) -> char {
-        self.nth_char(0)
+    pub(crate) fn peek(&self) -> char {
+        self.chars.clone().nth(0).unwrap_or(EOF_CHAR)
     }
 
     /// Peeks the second symbol from the input stream without consuming it.
-    pub(crate) fn second(&self) -> char {
-        self.nth_char(1)
+    pub(crate) fn peek_second(&self) -> char {
+        self.chars.clone().nth(1).unwrap_or(EOF_CHAR)
     }
 
     /// Checks if there is nothing more to consume.
@@ -65,11 +60,6 @@ impl<'a> Cursor<'a> {
         self.initial_len - self.chars.as_str().len()
     }
 
-    /// Returns a `Chars` iterator over the remaining characters.
-    fn chars(&self) -> Chars<'a> {
-        self.chars.clone()
-    }
-
     /// Moves to the next character.
     pub(crate) fn bump(&mut self) -> Option<char> {
         let c = self.chars.next()?;
@@ -81,4 +71,11 @@ impl<'a> Cursor<'a> {
 
         Some(c)
     }
+
+    /// Eats symbols while predicate returns true or until the end of file is reached.
+    pub(crate) fn bump_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
+        while predicate(self.peek()) && !self.is_eof() {
+            self.bump();
+        }
+    }
 }
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 44fc4db7dc199..dacda8b3ca7a0 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -22,15 +22,20 @@
 // `#![feature]` attributes should be added.
 
 mod cursor;
+mod literals;
 pub mod unescape;
 
 #[cfg(test)]
 mod tests;
 
-use self::LiteralKind::*;
+pub use crate::literals::{Base, LiteralKind, RawStrError};
+
 use self::TokenKind::*;
-use crate::cursor::{Cursor, EOF_CHAR};
-use std::convert::TryFrom;
+use crate::cursor::Cursor;
+use crate::literals::{
+    double_quoted_string, eat_literal_suffix, lifetime_or_char, number, raw_double_quoted_string,
+    single_quoted_string,
+};
 
 /// Parsed token.
 /// It doesn't contain information about data that has been parsed,
@@ -137,55 +142,6 @@ pub enum DocStyle {
     Inner,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub enum LiteralKind {
-    /// "12_u8", "0o100", "0b120i99"
-    Int { base: Base, empty_int: bool },
-    /// "12.34f32", "0b100.100"
-    Float { base: Base, empty_exponent: bool },
-    /// "'a'", "'\\'", "'''", "';"
-    Char { terminated: bool },
-    /// "b'a'", "b'\\'", "b'''", "b';"
-    Byte { terminated: bool },
-    /// ""abc"", ""abc"
-    Str { terminated: bool },
-    /// "b"abc"", "b"abc"
-    ByteStr { terminated: bool },
-    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
-    RawStr { n_hashes: u16, err: Option<RawStrError> },
-    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
-    RawByteStr { n_hashes: u16, err: Option<RawStrError> },
-}
-
-/// Error produced validating a raw string. Represents cases like:
-/// - `r##~"abcde"##`: `InvalidStarter`
-/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
-/// - Too many `#`s (>65535): `TooManyDelimiters`
-// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub enum RawStrError {
-    /// Non `#` characters exist between `r` and `"` eg. `r#~"..`
-    InvalidStarter { bad_char: char },
-    /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
-    /// may have intended to terminate it.
-    NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
-    /// More than 65535 `#`s exist.
-    TooManyDelimiters { found: usize },
-}
-
-/// Base of numeric literal encoding according to its prefix.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub enum Base {
-    /// Literal starts with "0b".
-    Binary,
-    /// Literal starts with "0o".
-    Octal,
-    /// Literal starts with "0x".
-    Hexadecimal,
-    /// Literal doesn't contain a prefix.
-    Decimal,
-}
-
 /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
 /// but shebang isn't a part of rust syntax.
 pub fn strip_shebang(input: &str) -> Option<usize> {
@@ -211,12 +167,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
     None
 }
 
-/// Parses the first token from the provided input string.
-pub fn first_token(input: &str) -> Token {
-    debug_assert!(!input.is_empty());
-    Cursor::new(input).advance_token()
-}
-
 /// Creates an iterator that produces tokens from the input string.
 pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
     std::iter::from_fn(move || {
@@ -296,521 +246,197 @@ pub fn is_ident(string: &str) -> bool {
     }
 }
 
-impl Cursor<'_> {
-    /// Parses a token from the input string.
-    fn advance_token(&mut self) -> Token {
-        let first_char = self.bump().unwrap();
-        let token_kind = match first_char {
-            // Slash, comment or block comment.
-            '/' => match self.first() {
-                '/' => self.line_comment(),
-                '*' => self.block_comment(),
-                _ => Slash,
-            },
-
-            // Whitespace sequence.
-            c if is_whitespace(c) => self.whitespace(),
-
-            // Raw identifier, raw string literal or identifier.
-            'r' => match (self.first(), self.second()) {
-                ('#', c1) if is_id_start(c1) => self.raw_ident(),
-                ('#', _) | ('"', _) => {
-                    let (n_hashes, err) = self.raw_double_quoted_string(1);
-                    let suffix_start = self.len_consumed();
-                    if err.is_none() {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = RawStr { n_hashes, err };
-                    Literal { kind, suffix_start }
-                }
-                _ => self.ident(),
-            },
-
-            // Byte literal, byte string literal, raw byte string literal or identifier.
-            'b' => match (self.first(), self.second()) {
-                ('\'', _) => {
-                    self.bump();
-                    let terminated = self.single_quoted_string();
-                    let suffix_start = self.len_consumed();
-                    if terminated {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = Byte { terminated };
-                    Literal { kind, suffix_start }
-                }
-                ('"', _) => {
-                    self.bump();
-                    let terminated = self.double_quoted_string();
-                    let suffix_start = self.len_consumed();
-                    if terminated {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = ByteStr { terminated };
-                    Literal { kind, suffix_start }
-                }
-                ('r', '"') | ('r', '#') => {
-                    self.bump();
-                    let (n_hashes, err) = self.raw_double_quoted_string(2);
-                    let suffix_start = self.len_consumed();
-                    if err.is_none() {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = RawByteStr { n_hashes, err };
-                    Literal { kind, suffix_start }
-                }
-                _ => self.ident(),
-            },
-
-            // Identifier (this should be checked after other variant that can
-            // start as identifier).
-            c if is_id_start(c) => self.ident(),
-
-            // Numeric literal.
-            c @ '0'..='9' => {
-                let literal_kind = self.number(c);
-                let suffix_start = self.len_consumed();
-                self.eat_literal_suffix();
-                TokenKind::Literal { kind: literal_kind, suffix_start }
-            }
+/// Parses the first token from the provided input string.
+pub fn first_token(input: &str) -> Token {
+    debug_assert!(!input.is_empty());
+    let cursor = &mut Cursor::new(input);
+
+    let first_char = cursor.bump().unwrap();
+    let token_kind = match first_char {
+        // Slash, comment or block comment.
+        '/' => match cursor.peek() {
+            '/' => line_comment(cursor),
+            '*' => block_comment(cursor),
+            _ => Slash,
+        },
+
+        // Whitespace sequence.
+        c if is_whitespace(c) => {
+            cursor.bump_while(is_whitespace);
+            Whitespace
+        }
 
-            // One-symbol tokens.
-            ';' => Semi,
-            ',' => Comma,
-            '.' => Dot,
-            '(' => OpenParen,
-            ')' => CloseParen,
-            '{' => OpenBrace,
-            '}' => CloseBrace,
-            '[' => OpenBracket,
-            ']' => CloseBracket,
-            '@' => At,
-            '#' => Pound,
-            '~' => Tilde,
-            '?' => Question,
-            ':' => Colon,
-            '$' => Dollar,
-            '=' => Eq,
-            '!' => Bang,
-            '<' => Lt,
-            '>' => Gt,
-            '-' => Minus,
-            '&' => And,
-            '|' => Or,
-            '+' => Plus,
-            '*' => Star,
-            '^' => Caret,
-            '%' => Percent,
-
-            // Lifetime or character literal.
-            '\'' => self.lifetime_or_char(),
-
-            // String literal.
-            '"' => {
-                let terminated = self.double_quoted_string();
-                let suffix_start = self.len_consumed();
-                if terminated {
-                    self.eat_literal_suffix();
+        // Raw identifier, raw string literal or identifier.
+        'r' => match (cursor.peek(), cursor.peek_second()) {
+            ('#', c1) if is_id_start(c1) => {
+                // Eat "#" symbol.
+                cursor.bump();
+                // Eat the identifier part of RawIdent.
+                cursor.bump();
+                ident(cursor);
+                RawIdent
+            }
+            ('#', _) | ('"', _) => {
+                let (n_hashes, err) = raw_double_quoted_string(cursor, 1);
+                let suffix_start = cursor.len_consumed();
+                if err.is_none() {
+                    eat_literal_suffix(cursor);
                 }
-                let kind = Str { terminated };
+                let kind = LiteralKind::RawStr { n_hashes, err };
                 Literal { kind, suffix_start }
             }
-            _ => Unknown,
-        };
-        Token::new(token_kind, self.len_consumed())
-    }
-
-    fn line_comment(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == '/' && self.first() == '/');
-        self.bump();
-
-        let doc_style = match self.first() {
-            // `//!` is an inner line doc comment.
-            '!' => Some(DocStyle::Inner),
-            // `////` (more than 3 slashes) is not considered a doc comment.
-            '/' if self.second() != '/' => Some(DocStyle::Outer),
-            _ => None,
-        };
-
-        self.eat_while(|c| c != '\n');
-        LineComment { doc_style }
-    }
-
-    fn block_comment(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == '/' && self.first() == '*');
-        self.bump();
-
-        let doc_style = match self.first() {
-            // `/*!` is an inner block doc comment.
-            '!' => Some(DocStyle::Inner),
-            // `/***` (more than 2 stars) is not considered a doc comment.
-            // `/**/` is not considered a doc comment.
-            '*' if !matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
-            _ => None,
-        };
-
-        let mut depth = 1usize;
-        while let Some(c) = self.bump() {
-            match c {
-                '/' if self.first() == '*' => {
-                    self.bump();
-                    depth += 1;
-                }
-                '*' if self.first() == '/' => {
-                    self.bump();
-                    depth -= 1;
-                    if depth == 0 {
-                        // This block comment is closed, so for a construction like "/* */ */"
-                        // there will be a successfully parsed block comment "/* */"
-                        // and " */" will be processed separately.
-                        break;
-                    }
+            _ => ident(cursor),
+        },
+
+        // Byte literal, byte string literal, raw byte string literal or identifier.
+        'b' => match (cursor.peek(), cursor.peek_second()) {
+            ('\'', _) => {
+                cursor.bump();
+                let terminated = single_quoted_string(cursor);
+                let suffix_start = cursor.len_consumed();
+                if terminated {
+                    eat_literal_suffix(cursor);
                 }
-                _ => (),
+                let kind = LiteralKind::Byte { terminated };
+                Literal { kind, suffix_start }
             }
-        }
-
-        BlockComment { doc_style, terminated: depth == 0 }
-    }
-
-    fn whitespace(&mut self) -> TokenKind {
-        debug_assert!(is_whitespace(self.prev()));
-        self.eat_while(is_whitespace);
-        Whitespace
-    }
-
-    fn raw_ident(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == 'r' && self.first() == '#' && is_id_start(self.second()));
-        // Eat "#" symbol.
-        self.bump();
-        // Eat the identifier part of RawIdent.
-        self.eat_identifier();
-        RawIdent
-    }
-
-    fn ident(&mut self) -> TokenKind {
-        debug_assert!(is_id_start(self.prev()));
-        // Start is already eaten, eat the rest of identifier.
-        self.eat_while(is_id_continue);
-        Ident
-    }
-
-    fn number(&mut self, first_digit: char) -> LiteralKind {
-        debug_assert!('0' <= self.prev() && self.prev() <= '9');
-        let mut base = Base::Decimal;
-        if first_digit == '0' {
-            // Attempt to parse encoding base.
-            let has_digits = match self.first() {
-                'b' => {
-                    base = Base::Binary;
-                    self.bump();
-                    self.eat_decimal_digits()
-                }
-                'o' => {
-                    base = Base::Octal;
-                    self.bump();
-                    self.eat_decimal_digits()
-                }
-                'x' => {
-                    base = Base::Hexadecimal;
-                    self.bump();
-                    self.eat_hexadecimal_digits()
-                }
-                // Not a base prefix.
-                '0'..='9' | '_' | '.' | 'e' | 'E' => {
-                    self.eat_decimal_digits();
-                    true
+            ('"', _) => {
+                cursor.bump();
+                let terminated = double_quoted_string(cursor);
+                let suffix_start = cursor.len_consumed();
+                if terminated {
+                    eat_literal_suffix(cursor);
                 }
-                // Just a 0.
-                _ => return Int { base, empty_int: false },
-            };
-            // Base prefix was provided, but there were no digits
-            // after it, e.g. "0x".
-            if !has_digits {
-                return Int { base, empty_int: true };
+                let kind = LiteralKind::ByteStr { terminated };
+                Literal { kind, suffix_start }
             }
-        } else {
-            // No base prefix, parse number in the usual way.
-            self.eat_decimal_digits();
-        };
-
-        match self.first() {
-            // Don't be greedy if this is actually an
-            // integer literal followed by field/method access or a range pattern
-            // (`0..2` and `12.foo()`)
-            '.' if self.second() != '.' && !is_id_start(self.second()) => {
-                // might have stuff after the ., and if it does, it needs to start
-                // with a number
-                self.bump();
-                let mut empty_exponent = false;
-                if self.first().is_digit(10) {
-                    self.eat_decimal_digits();
-                    match self.first() {
-                        'e' | 'E' => {
-                            self.bump();
-                            empty_exponent = !self.eat_float_exponent();
-                        }
-                        _ => (),
-                    }
+            ('r', '"') | ('r', '#') => {
+                cursor.bump();
+                let (n_hashes, err) = raw_double_quoted_string(cursor, 2);
+                let suffix_start = cursor.len_consumed();
+                if err.is_none() {
+                    eat_literal_suffix(cursor);
                 }
-                Float { base, empty_exponent }
-            }
-            'e' | 'E' => {
-                self.bump();
-                let empty_exponent = !self.eat_float_exponent();
-                Float { base, empty_exponent }
+                let kind = LiteralKind::RawByteStr { n_hashes, err };
+                Literal { kind, suffix_start }
             }
-            _ => Int { base, empty_int: false },
+            _ => ident(cursor),
+        },
+
+        // Identifier (this should be checked after other variant that can
+        // start as identifier).
+        c if is_id_start(c) => ident(cursor),
+
+        // Numeric literal.
+        c @ '0'..='9' => {
+            let literal_kind = number(cursor, c);
+            let suffix_start = cursor.len_consumed();
+            eat_literal_suffix(cursor);
+            TokenKind::Literal { kind: literal_kind, suffix_start }
         }
-    }
 
-    fn lifetime_or_char(&mut self) -> TokenKind {
-        debug_assert!(self.prev() == '\'');
-
-        let can_be_a_lifetime = if self.second() == '\'' {
-            // It's surely not a lifetime.
-            false
-        } else {
-            // If the first symbol is valid for identifier, it can be a lifetime.
-            // Also check if it's a number for a better error reporting (so '0 will
-            // be reported as invalid lifetime and not as unterminated char literal).
-            is_id_start(self.first()) || self.first().is_digit(10)
-        };
-
-        if !can_be_a_lifetime {
-            let terminated = self.single_quoted_string();
-            let suffix_start = self.len_consumed();
+        // One-symbol tokens.
+        ';' => Semi,
+        ',' => Comma,
+        '.' => Dot,
+        '(' => OpenParen,
+        ')' => CloseParen,
+        '{' => OpenBrace,
+        '}' => CloseBrace,
+        '[' => OpenBracket,
+        ']' => CloseBracket,
+        '@' => At,
+        '#' => Pound,
+        '~' => Tilde,
+        '?' => Question,
+        ':' => Colon,
+        '$' => Dollar,
+        '=' => Eq,
+        '!' => Bang,
+        '<' => Lt,
+        '>' => Gt,
+        '-' => Minus,
+        '&' => And,
+        '|' => Or,
+        '+' => Plus,
+        '*' => Star,
+        '^' => Caret,
+        '%' => Percent,
+
+        // Lifetime or character literal.
+        '\'' => lifetime_or_char(cursor),
+
+        // String literal.
+        '"' => {
+            let terminated = double_quoted_string(cursor);
+            let suffix_start = cursor.len_consumed();
             if terminated {
-                self.eat_literal_suffix();
-            }
-            let kind = Char { terminated };
-            return Literal { kind, suffix_start };
-        }
-
-        // Either a lifetime or a character literal with
-        // length greater than 1.
-
-        let starts_with_number = self.first().is_digit(10);
-
-        // Skip the literal contents.
-        // First symbol can be a number (which isn't a valid identifier start),
-        // so skip it without any checks.
-        self.bump();
-        self.eat_while(is_id_continue);
-
-        // Check if after skipping literal contents we've met a closing
-        // single quote (which means that user attempted to create a
-        // string with single quotes).
-        if self.first() == '\'' {
-            self.bump();
-            let kind = Char { terminated: true };
-            Literal { kind, suffix_start: self.len_consumed() }
-        } else {
-            Lifetime { starts_with_number }
-        }
-    }
-
-    fn single_quoted_string(&mut self) -> bool {
-        debug_assert!(self.prev() == '\'');
-        // Check if it's a one-symbol literal.
-        if self.second() == '\'' && self.first() != '\\' {
-            self.bump();
-            self.bump();
-            return true;
-        }
-
-        // Literal has more than one symbol.
-
-        // Parse until either quotes are terminated or error is detected.
-        loop {
-            match self.first() {
-                // Quotes are terminated, finish parsing.
-                '\'' => {
-                    self.bump();
-                    return true;
-                }
-                // Probably beginning of the comment, which we don't want to include
-                // to the error report.
-                '/' => break,
-                // Newline without following '\'' means unclosed quote, stop parsing.
-                '\n' if self.second() != '\'' => break,
-                // End of file, stop parsing.
-                EOF_CHAR if self.is_eof() => break,
-                // Escaped slash is considered one character, so bump twice.
-                '\\' => {
-                    self.bump();
-                    self.bump();
-                }
-                // Skip the character.
-                _ => {
-                    self.bump();
-                }
-            }
-        }
-        // String was not terminated.
-        false
-    }
-
-    /// Eats double-quoted string and returns true
-    /// if string is terminated.
-    fn double_quoted_string(&mut self) -> bool {
-        debug_assert!(self.prev() == '"');
-        while let Some(c) = self.bump() {
-            match c {
-                '"' => {
-                    return true;
-                }
-                '\\' if self.first() == '\\' || self.first() == '"' => {
-                    // Bump again to skip escaped character.
-                    self.bump();
-                }
-                _ => (),
-            }
-        }
-        // End of file reached.
-        false
-    }
-
-    /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
-    fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u16, Option<RawStrError>) {
-        // Wrap the actual function to handle the error with too many hashes.
-        // This way, it eats the whole raw string.
-        let (n_hashes, err) = self.raw_string_unvalidated(prefix_len);
-        // Only up to 65535 `#`s are allowed in raw strings
-        match u16::try_from(n_hashes) {
-            Ok(num) => (num, err),
-            // We lie about the number of hashes here :P
-            Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
-        }
-    }
-
-    fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option<RawStrError>) {
-        debug_assert!(self.prev() == 'r');
-        let start_pos = self.len_consumed();
-        let mut possible_terminator_offset = None;
-        let mut max_hashes = 0;
-
-        // Count opening '#' symbols.
-        let mut eaten = 0;
-        while self.first() == '#' {
-            eaten += 1;
-            self.bump();
-        }
-        let n_start_hashes = eaten;
-
-        // Check that string is started.
-        match self.bump() {
-            Some('"') => (),
-            c => {
-                let c = c.unwrap_or(EOF_CHAR);
-                return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
+                eat_literal_suffix(cursor);
             }
+            let kind = LiteralKind::Str { terminated };
+            Literal { kind, suffix_start }
         }
+        _ => Unknown,
+    };
+    Token::new(token_kind, cursor.len_consumed())
+}
 
-        // Skip the string contents and on each '#' character met, check if this is
-        // a raw string termination.
-        loop {
-            self.eat_while(|c| c != '"');
-
-            if self.is_eof() {
-                return (
-                    n_start_hashes,
-                    Some(RawStrError::NoTerminator {
-                        expected: n_start_hashes,
-                        found: max_hashes,
-                        possible_terminator_offset,
-                    }),
-                );
-            }
+fn line_comment(cursor: &mut Cursor<'_>) -> TokenKind {
+    debug_assert!(cursor.prev() == '/' && cursor.peek() == '/');
+    cursor.bump();
 
-            // Eat closing double quote.
-            self.bump();
-
-            // Check that amount of closing '#' symbols
-            // is equal to the amount of opening ones.
-            // Note that this will not consume extra trailing `#` characters:
-            // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
-            // followed by a `#` token.
-            let mut n_end_hashes = 0;
-            while self.first() == '#' && n_end_hashes < n_start_hashes {
-                n_end_hashes += 1;
-                self.bump();
-            }
+    let doc_style = match cursor.peek() {
+        // `//!` is an inner line doc comment.
+        '!' => Some(DocStyle::Inner),
+        // `////` (more than 3 slashes) is not considered a doc comment.
+        '/' if cursor.peek_second() != '/' => Some(DocStyle::Outer),
+        _ => None,
+    };
 
-            if n_end_hashes == n_start_hashes {
-                return (n_start_hashes, None);
-            } else if n_end_hashes > max_hashes {
-                // Keep track of possible terminators to give a hint about
-                // where there might be a missing terminator
-                possible_terminator_offset =
-                    Some(self.len_consumed() - start_pos - n_end_hashes + prefix_len);
-                max_hashes = n_end_hashes;
-            }
-        }
-    }
+    cursor.bump_while(|c| c != '\n');
+    LineComment { doc_style }
+}
 
-    fn eat_decimal_digits(&mut self) -> bool {
-        let mut has_digits = false;
-        loop {
-            match self.first() {
-                '_' => {
-                    self.bump();
-                }
-                '0'..='9' => {
-                    has_digits = true;
-                    self.bump();
-                }
-                _ => break,
+fn block_comment(cursor: &mut Cursor<'_>) -> TokenKind {
+    debug_assert!(cursor.prev() == '/' && cursor.peek() == '*');
+    cursor.bump();
+
+    let doc_style = match cursor.peek() {
+        // `/*!` is an inner block doc comment.
+        '!' => Some(DocStyle::Inner),
+        // `/***` (more than 2 stars) is not considered a doc comment.
+        // `/**/` is not considered a doc comment.
+        '*' if !matches!(cursor.peek_second(), '*' | '/') => Some(DocStyle::Outer),
+        _ => None,
+    };
+
+    let mut depth = 1usize;
+    while let Some(c) = cursor.bump() {
+        match c {
+            '/' if cursor.peek() == '*' => {
+                cursor.bump();
+                depth += 1;
             }
-        }
-        has_digits
-    }
-
-    fn eat_hexadecimal_digits(&mut self) -> bool {
-        let mut has_digits = false;
-        loop {
-            match self.first() {
-                '_' => {
-                    self.bump();
-                }
-                '0'..='9' | 'a'..='f' | 'A'..='F' => {
-                    has_digits = true;
-                    self.bump();
+            '*' if cursor.peek() == '/' => {
+                cursor.bump();
+                depth -= 1;
+                if depth == 0 {
+                    // This block comment is closed, so for a construction like "/* */ */"
+                    // there will be a successfully parsed block comment "/* */"
+                    // and " */" will be processed separately.
+                    break;
                 }
-                _ => break,
             }
+            _ => (),
         }
-        has_digits
-    }
-
-    /// Eats the float exponent. Returns true if at least one digit was met,
-    /// and returns false otherwise.
-    fn eat_float_exponent(&mut self) -> bool {
-        debug_assert!(self.prev() == 'e' || self.prev() == 'E');
-        if self.first() == '-' || self.first() == '+' {
-            self.bump();
-        }
-        self.eat_decimal_digits()
-    }
-
-    // Eats the suffix of the literal, e.g. "_u8".
-    fn eat_literal_suffix(&mut self) {
-        self.eat_identifier();
     }
 
-    // Eats the identifier.
-    fn eat_identifier(&mut self) {
-        if !is_id_start(self.first()) {
-            return;
-        }
-        self.bump();
-
-        self.eat_while(is_id_continue);
-    }
+    BlockComment { doc_style, terminated: depth == 0 }
+}
 
-    /// Eats symbols while predicate returns true or until the end of file is reached.
-    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
-        while predicate(self.first()) && !self.is_eof() {
-            self.bump();
-        }
-    }
+/// Start is already eaten, eat the rest of identifier.
+pub(crate) fn ident(cursor: &mut Cursor<'_>) -> TokenKind {
+    debug_assert!(is_id_start(cursor.prev()));
+    cursor.bump_while(is_id_continue);
+    Ident
 }
diff --git a/compiler/rustc_lexer/src/literals.rs b/compiler/rustc_lexer/src/literals.rs
new file mode 100644
index 0000000000000..33792622bec57
--- /dev/null
+++ b/compiler/rustc_lexer/src/literals.rs
@@ -0,0 +1,366 @@
+use crate::cursor::{Cursor, EOF_CHAR};
+use crate::{ident, is_id_continue, is_id_start, TokenKind};
+use std::convert::TryFrom;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum LiteralKind {
+    /// "12_u8", "0o100", "0b120i99"
+    Int { base: Base, empty_int: bool },
+    /// "12.34f32", "0b100.100"
+    Float { base: Base, empty_exponent: bool },
+    /// "'a'", "'\\'", "'''", "';"
+    Char { terminated: bool },
+    /// "b'a'", "b'\\'", "b'''", "b';"
+    Byte { terminated: bool },
+    /// ""abc"", ""abc"
+    Str { terminated: bool },
+    /// "b"abc"", "b"abc"
+    ByteStr { terminated: bool },
+    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
+    RawStr { n_hashes: u16, err: Option<RawStrError> },
+    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
+    RawByteStr { n_hashes: u16, err: Option<RawStrError> },
+}
+
+/// Base of numeric literal encoding according to its prefix.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Base {
+    /// Literal starts with "0b".
+    Binary,
+    /// Literal starts with "0o".
+    Octal,
+    /// Literal starts with "0x".
+    Hexadecimal,
+    /// Literal doesn't contain a prefix.
+    Decimal,
+}
+
+/// Error produced validating a raw string. Represents cases like:
+/// - `r##~"abcde"##`: `InvalidStarter`
+/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
+/// - Too many `#`s (>65535): `TooManyDelimiters`
+// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum RawStrError {
+    /// Non `#` characters exist between `r` and `"` eg. `r#~"..`
+    InvalidStarter { bad_char: char },
+    /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
+    /// may have intended to terminate it.
+    NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
+    /// More than 65535 `#`s exist.
+    TooManyDelimiters { found: usize },
+}
+
+pub(crate) fn number(cursor: &mut Cursor<'_>, first_digit: char) -> LiteralKind {
+    debug_assert!('0' <= cursor.prev() && cursor.prev() <= '9');
+    let mut base = Base::Decimal;
+    if first_digit == '0' {
+        // Attempt to parse encoding base.
+        let has_digits = match cursor.peek() {
+            'b' => {
+                base = Base::Binary;
+                cursor.bump();
+                eat_decimal_digits(cursor)
+            }
+            'o' => {
+                base = Base::Octal;
+                cursor.bump();
+                eat_decimal_digits(cursor)
+            }
+            'x' => {
+                base = Base::Hexadecimal;
+                cursor.bump();
+                eat_hexadecimal_digits(cursor)
+            }
+            // Not a base prefix.
+            '0'..='9' | '_' | '.' | 'e' | 'E' => {
+                eat_decimal_digits(cursor);
+                true
+            }
+            // Just a 0.
+            _ => return LiteralKind::Int { base, empty_int: false },
+        };
+        // Base prefix was provided, but there were no digits
+        // after it, e.g. "0x".
+        if !has_digits {
+            return LiteralKind::Int { base, empty_int: true };
+        }
+    } else {
+        // No base prefix, parse number in the usual way.
+        eat_decimal_digits(cursor);
+    };
+
+    match cursor.peek() {
+        // Don't be greedy if this is actually an
+        // integer literal followed by field/method access or a range pattern
+        // (`0..2` and `12.foo()`)
+        '.' if cursor.peek_second() != '.' && !is_id_start(cursor.peek_second()) => {
+            // might have stuff after the ., and if it does, it needs to start
+            // with a number
+            cursor.bump();
+            let mut empty_exponent = false;
+            if cursor.peek().is_digit(10) {
+                eat_decimal_digits(cursor);
+                match cursor.peek() {
+                    'e' | 'E' => {
+                        cursor.bump();
+                        empty_exponent = !eat_float_exponent(cursor);
+                    }
+                    _ => (),
+                }
+            }
+            LiteralKind::Float { base, empty_exponent }
+        }
+        'e' | 'E' => {
+            cursor.bump();
+            let empty_exponent = !eat_float_exponent(cursor);
+            LiteralKind::Float { base, empty_exponent }
+        }
+        _ => LiteralKind::Int { base, empty_int: false },
+    }
+}
+
+pub(crate) fn eat_decimal_digits(cursor: &mut Cursor<'_>) -> bool {
+    let mut has_digits = false;
+    loop {
+        match cursor.peek() {
+            '_' => {
+                cursor.bump();
+            }
+            '0'..='9' => {
+                has_digits = true;
+                cursor.bump();
+            }
+            _ => break,
+        }
+    }
+    has_digits
+}
+
+pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor<'_>) -> bool {
+    let mut has_digits = false;
+    loop {
+        match cursor.peek() {
+            '_' => {
+                cursor.bump();
+            }
+            '0'..='9' | 'a'..='f' | 'A'..='F' => {
+                has_digits = true;
+                cursor.bump();
+            }
+            _ => break,
+        }
+    }
+    has_digits
+}
+
+/// Eats the float exponent. Returns true if at least one digit was met,
+/// and returns false otherwise.
+fn eat_float_exponent(cursor: &mut Cursor<'_>) -> bool {
+    debug_assert!(cursor.prev() == 'e' || cursor.prev() == 'E');
+    if cursor.peek() == '-' || cursor.peek() == '+' {
+        cursor.bump();
+    }
+    eat_decimal_digits(cursor)
+}
+
+pub(crate) fn lifetime_or_char(cursor: &mut Cursor<'_>) -> TokenKind {
+    debug_assert!(cursor.prev() == '\'');
+
+    let can_be_a_lifetime = if cursor.peek_second() == '\'' {
+        // It's surely not a lifetime.
+        false
+    } else {
+        // If the first symbol is valid for identifier, it can be a lifetime.
+        // Also check if it's a number for a better error reporting (so '0 will
+        // be reported as invalid lifetime and not as unterminated char literal).
+        is_id_start(cursor.peek()) || cursor.peek().is_digit(10)
+    };
+
+    if !can_be_a_lifetime {
+        let terminated = single_quoted_string(cursor);
+        let suffix_start = cursor.len_consumed();
+        if terminated {
+            eat_literal_suffix(cursor);
+        }
+        let kind = LiteralKind::Char { terminated };
+        return TokenKind::Literal { kind, suffix_start };
+    }
+
+    // Either a lifetime or a character literal with
+    // length greater than 1.
+
+    let starts_with_number = cursor.peek().is_digit(10);
+
+    // Skip the literal contents.
+    // First symbol can be a number (which isn't a valid identifier start),
+    // so skip it without any checks.
+    cursor.bump();
+    cursor.bump_while(is_id_continue);
+
+    // Check if after skipping literal contents we've met a closing
+    // single quote (which means that user attempted to create a
+    // string with single quotes).
+    if cursor.peek() == '\'' {
+        cursor.bump();
+        let kind = LiteralKind::Char { terminated: true };
+        TokenKind::Literal { kind, suffix_start: cursor.len_consumed() }
+    } else {
+        TokenKind::Lifetime { starts_with_number }
+    }
+}
+
+pub(crate) fn single_quoted_string(cursor: &mut Cursor<'_>) -> bool {
+    debug_assert!(cursor.prev() == '\'');
+    // Check if it's a one-symbol literal.
+    if cursor.peek_second() == '\'' && cursor.peek() != '\\' {
+        cursor.bump();
+        cursor.bump();
+        return true;
+    }
+
+    // Literal has more than one symbol.
+
+    // Parse until either quotes are terminated or error is detected.
+    loop {
+        match cursor.peek() {
+            // Quotes are terminated, finish parsing.
+            '\'' => {
+                cursor.bump();
+                return true;
+            }
+            // Probably beginning of the comment, which we don't want to include
+            // to the error report.
+            '/' => break,
+            // Newline without following '\'' means unclosed quote, stop parsing.
+            '\n' if cursor.peek_second() != '\'' => break,
+            // End of file, stop parsing.
+            EOF_CHAR if cursor.is_eof() => break,
+            // Escaped slash is considered one character, so bump twice.
+            '\\' => {
+                cursor.bump();
+                cursor.bump();
+            }
+            // Skip the character.
+            _ => {
+                cursor.bump();
+            }
+        }
+    }
+    // String was not terminated.
+    false
+}
+
+/// Eats double-quoted string and returns true
+/// if string is terminated.
+pub(crate) fn double_quoted_string(cursor: &mut Cursor<'_>) -> bool {
+    debug_assert!(cursor.prev() == '"');
+    while let Some(c) = cursor.bump() {
+        match c {
+            '"' => {
+                return true;
+            }
+            '\\' if cursor.peek() == '\\' || cursor.peek() == '"' => {
+                // Bump again to skip escaped character.
+                cursor.bump();
+            }
+            _ => (),
+        }
+    }
+    // End of file reached.
+    false
+}
+
+/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
+pub(crate) fn raw_double_quoted_string(
+    cursor: &mut Cursor<'_>,
+    prefix_len: usize,
+) -> (u16, Option<RawStrError>) {
+    // Wrap the actual function to handle the error with too many hashes.
+    // This way, it eats the whole raw string.
+    let (n_hashes, err) = raw_string_unvalidated(cursor, prefix_len);
+
+    // Only up to 65535 `#`s are allowed in raw strings.
+    match u16::try_from(n_hashes) {
+        Ok(num) => (num, err),
+        // We lie about the number of hashes here :P
+        Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
+    }
+}
+
+fn raw_string_unvalidated(
+    cursor: &mut Cursor<'_>,
+    prefix_len: usize,
+) -> (usize, Option<RawStrError>) {
+    debug_assert!(cursor.prev() == 'r');
+    let start_pos = cursor.len_consumed();
+    let mut possible_terminator_offset = None;
+    let mut max_hashes = 0;
+
+    // Count opening '#' symbols.
+    let mut eaten = 0;
+    while cursor.peek() == '#' {
+        eaten += 1;
+        cursor.bump();
+    }
+    let n_start_hashes = eaten;
+
+    // Check that string is started.
+    match cursor.bump() {
+        Some('"') => (),
+        c => {
+            let c = c.unwrap_or(EOF_CHAR);
+            return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
+        }
+    }
+
+    // Skip the string contents and on each '#' character met, check if this is
+    // a raw string termination.
+    loop {
+        cursor.bump_while(|c| c != '"');
+
+        if cursor.is_eof() {
+            return (
+                n_start_hashes,
+                Some(RawStrError::NoTerminator {
+                    expected: n_start_hashes,
+                    found: max_hashes,
+                    possible_terminator_offset,
+                }),
+            );
+        }
+
+        // Eat closing double quote.
+        cursor.bump();
+
+        // Check that amount of closing '#' symbols
+        // is equal to the amount of opening ones.
+        // Note that this will not consume extra trailing `#` characters:
+        // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
+        // followed by a `#` token.
+        let mut n_end_hashes = 0;
+        while cursor.peek() == '#' && n_end_hashes < n_start_hashes {
+            n_end_hashes += 1;
+            cursor.bump();
+        }
+
+        if n_end_hashes == n_start_hashes {
+            return (n_start_hashes, None);
+        } else if n_end_hashes > max_hashes {
+            // Keep track of possible terminators to give a hint about
+            // where there might be a missing terminator
+            possible_terminator_offset =
+                Some(cursor.len_consumed() - start_pos - n_end_hashes + prefix_len);
+            max_hashes = n_end_hashes;
+        }
+    }
+}
+
+/// Eats the suffix of a literal, e.g. "_u8".
+pub(crate) fn eat_literal_suffix(cursor: &mut Cursor<'_>) {
+    // Eats one identifier.
+    if is_id_start(cursor.peek()) {
+        cursor.bump();
+        ident(cursor);
+    }
+}
diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs
index 94017b7b286e2..8f1a968526609 100644
--- a/compiler/rustc_lexer/src/tests.rs
+++ b/compiler/rustc_lexer/src/tests.rs
@@ -1,12 +1,13 @@
-use super::*;
-
+use crate::cursor::Cursor;
+use crate::literals::{raw_double_quoted_string, RawStrError};
+use crate::{strip_shebang, tokenize};
 use expect_test::{expect, Expect};
 
 fn check_raw_str(s: &str, expected_hashes: u16, expected_err: Option<RawStrError>) {
     let s = &format!("r{}", s);
-    let mut cursor = Cursor::new(s);
+    let cursor = &mut Cursor::new(s);
     cursor.bump();
-    let (n_hashes, err) = cursor.raw_double_quoted_string(0);
+    let (n_hashes, err) = raw_double_quoted_string(cursor, 0);
     assert_eq!(n_hashes, expected_hashes);
     assert_eq!(err, expected_err);
 }