Skip to content

Make str::from_utf8 const fn #84643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
#![feature(const_ptr_write)]
#![feature(const_raw_ptr_comparison)]
#![feature(const_raw_ptr_deref)]
#![feature(const_raw_ptr_to_usize_cast)]
#![feature(const_slice_from_raw_parts)]
#![feature(const_slice_ptr_len)]
#![feature(const_size_of_val)]
Expand All @@ -103,9 +104,11 @@
#![feature(const_type_id)]
#![feature(const_type_name)]
#![feature(const_likely)]
#![feature(const_str_from_utf8_unchecked)]
#![feature(const_unreachable_unchecked)]
#![feature(const_maybe_uninit_assume_init)]
#![feature(const_maybe_uninit_as_ptr)]
#![feature(str_internals)]
#![feature(custom_inner_attributes)]
#![feature(decl_macro)]
#![feature(doc_cfg)]
Expand Down
31 changes: 22 additions & 9 deletions library/core/src/str/converts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,16 @@ use super::Utf8Error;
/// assert_eq!("💖", sparkle_heart);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
run_utf8_validation(v)?;
// SAFETY: Just ran validation.
Ok(unsafe { from_utf8_unchecked(v) })
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
#[rustc_allow_const_fn_unstable(str_internals)]
#[rustc_allow_const_fn_unstable(const_str_from_utf8_unchecked)]
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
// ? is not available in const
match run_utf8_validation(v) {
// SAFETY: Just ran validation.
Ok(_) => Ok(unsafe { from_utf8_unchecked(v) }),
Err(e) => Err(e),
}
}

/// Converts a mutable slice of bytes to a mutable string slice.
Expand Down Expand Up @@ -119,10 +125,16 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
/// See the docs for [`Utf8Error`] for more details on the kinds of
/// errors that can be returned.
#[stable(feature = "str_mut_extras", since = "1.20.0")]
pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
run_utf8_validation(v)?;
// SAFETY: Just ran validation.
Ok(unsafe { from_utf8_unchecked_mut(v) })
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
#[rustc_allow_const_fn_unstable(str_internals)]
#[rustc_allow_const_fn_unstable(const_str_from_utf8_unchecked)]
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
// ? is not available in const
match run_utf8_validation(v) {
// SAFETY: Just ran validation.
Ok(_) => Ok(unsafe { from_utf8_unchecked_mut(v) }),
Err(e) => Err(e),
}
}

/// Converts a slice of bytes to a string slice without checking
Expand Down Expand Up @@ -183,7 +195,8 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
/// ```
#[inline]
#[stable(feature = "str_mut_extras", since = "1.20.0")]
pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked", issue = "75196")]
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
// SAFETY: the caller must guarantee that the bytes `v`
// are valid UTF-8, thus the cast to `*mut str` is safe.
// Also, the pointer dereference is safe because that pointer
Expand Down
12 changes: 8 additions & 4 deletions library/core/src/str/validations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,21 +105,25 @@ const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;

/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
#[inline]
fn contains_nonascii(x: usize) -> bool {
const fn contains_nonascii(x: usize) -> bool {
(x & NONASCII_MASK) != 0
}

/// Walks through `v` checking that it's a valid UTF-8 sequence,
/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
#[inline(always)]
pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
#[rustc_const_unstable(feature = "str_internals", issue = "none")]
pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
let mut index = 0;
let len = v.len();

let usize_bytes = mem::size_of::<usize>();
let ascii_block_size = 2 * usize_bytes;
let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
let align = v.as_ptr().align_offset(usize_bytes);
// FIXME(lf-) align_offset is not const fn yet, so we do it manually
let mask = usize_bytes - 1;
// SAFETY: uh help pls
let align = (usize_bytes - (unsafe { v.as_ptr() as usize } & mask)) & mask;

while index < len {
let old_offset = index;
Expand Down Expand Up @@ -230,7 +234,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
}

// https://tools.ietf.org/html/rfc3629
static UTF8_CHAR_WIDTH: [u8; 256] = [
const UTF8_CHAR_WIDTH: [u8; 256] = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, // 0x1F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
Expand Down