Skip to content

Commit 7c7cd90

Browse files
committed
Separate the unescape functions for string, byte string and C string, but avoid duplicating code via macro_rules.
Also plays with NonZero, since C strings cannot contain null bytes, which can be captured in the type system.
1 parent c4b38a5 commit 7c7cd90

File tree

10 files changed

+372
-394
lines changed

10 files changed

+372
-394
lines changed

compiler/rustc_ast/src/util/literal.rs

+8-9
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::{ascii, fmt, str};
44

55
use literal_escaper::{
6-
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
6+
MixedUnit, unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str,
77
};
88
use rustc_span::{Span, Symbol, kw, sym};
99
use tracing::debug;
@@ -87,11 +87,10 @@ impl LitKind {
8787
// Force-inlining here is aggressive but the closure is
8888
// called on every char in the string, so it can be hot in
8989
// programs with many long strings containing escapes.
90-
unescape_unicode(
90+
unescape_str(
9191
s,
92-
Mode::Str,
9392
&mut #[inline(always)]
94-
|_, c| match c {
93+
|_, res| match res {
9594
Ok(c) => buf.push(c),
9695
Err(err) => {
9796
assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -111,8 +110,8 @@ impl LitKind {
111110
token::ByteStr => {
112111
let s = symbol.as_str();
113112
let mut buf = Vec::with_capacity(s.len());
114-
unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
115-
Ok(c) => buf.push(byte_from_char(c)),
113+
unescape_byte_str(s, &mut |_, res| match res {
114+
Ok(b) => buf.push(b),
116115
Err(err) => {
117116
assert!(!err.is_fatal(), "failed to unescape string literal")
118117
}
@@ -128,11 +127,11 @@ impl LitKind {
128127
token::CStr => {
129128
let s = symbol.as_str();
130129
let mut buf = Vec::with_capacity(s.len());
131-
unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
130+
unescape_cstr(s, &mut |_span, c| match c {
132131
Ok(MixedUnit::Char(c)) => {
133-
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
132+
buf.extend_from_slice(c.get().encode_utf8(&mut [0; 4]).as_bytes())
134133
}
135-
Ok(MixedUnit::HighByte(b)) => buf.push(b),
134+
Ok(MixedUnit::HighByte(b)) => buf.push(b.get()),
136135
Err(err) => {
137136
assert!(!err.is_fatal(), "failed to unescape C string literal")
138137
}

compiler/rustc_parse/src/lexer/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -985,8 +985,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
985985
prefix_len: u32,
986986
postfix_len: u32,
987987
) -> (token::LitKind, Symbol) {
988-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
989-
literal_escaper::unescape_mixed(src, mode, &mut |span, result| {
988+
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, _mode, callback| {
989+
literal_escaper::unescape_cstr(src, &mut |span, result| {
990990
callback(span, result.map(drop))
991991
})
992992
})

library/core/src/num/niche_types.rs

+2
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ define_valid_range_type! {
131131
pub struct NonZeroI32Inner(i32 as u32 in 1..=0xffff_ffff);
132132
pub struct NonZeroI64Inner(i64 as u64 in 1..=0xffffffff_ffffffff);
133133
pub struct NonZeroI128Inner(i128 as u128 in 1..=0xffffffffffffffff_ffffffffffffffff);
134+
135+
pub struct NonZeroCharInner(char as u32 in 1..=0x10ffff);
134136
}
135137

136138
#[cfg(target_pointer_width = "16")]

library/core/src/num/nonzero.rs

+1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ impl_zeroable_primitive!(
7979
NonZeroI64Inner(i64),
8080
NonZeroI128Inner(i128),
8181
NonZeroIsizeInner(isize),
82+
NonZeroCharInner(char),
8283
);
8384

8485
/// A value that is known not to equal zero.

0 commit comments

Comments
 (0)