diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 0e2c140c367a9..df451ccdcb437 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -95,6 +95,7 @@ #![feature(const_ptr_write)] #![feature(const_raw_ptr_comparison)] #![feature(const_raw_ptr_deref)] +#![feature(const_raw_ptr_to_usize_cast)] #![feature(const_slice_from_raw_parts)] #![feature(const_slice_ptr_len)] #![feature(const_size_of_val)] @@ -103,9 +104,11 @@ #![feature(const_type_id)] #![feature(const_type_name)] #![feature(const_likely)] +#![feature(const_str_from_utf8_unchecked)] #![feature(const_unreachable_unchecked)] #![feature(const_maybe_uninit_assume_init)] #![feature(const_maybe_uninit_as_ptr)] +#![feature(str_internals)] #![feature(custom_inner_attributes)] #![feature(decl_macro)] #![feature(doc_cfg)] diff --git a/library/core/src/str/converts.rs b/library/core/src/str/converts.rs index 05ff7bb120dae..d282c823e84b8 100644 --- a/library/core/src/str/converts.rs +++ b/library/core/src/str/converts.rs @@ -82,10 +82,16 @@ use super::Utf8Error; /// assert_eq!("💖", sparkle_heart); /// ``` #[stable(feature = "rust1", since = "1.0.0")] -pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { - run_utf8_validation(v)?; - // SAFETY: Just ran validation. - Ok(unsafe { from_utf8_unchecked(v) }) +#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")] +#[rustc_allow_const_fn_unstable(str_internals)] +#[rustc_allow_const_fn_unstable(const_str_from_utf8_unchecked)] +pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { + // ? is not available in const + match run_utf8_validation(v) { + // SAFETY: Just ran validation. + Ok(_) => Ok(unsafe { from_utf8_unchecked(v) }), + Err(e) => Err(e), + } } /// Converts a mutable slice of bytes to a mutable string slice. @@ -119,10 +125,16 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { /// See the docs for [`Utf8Error`] for more details on the kinds of /// errors that can be returned. #[stable(feature = "str_mut_extras", since = "1.20.0")] -pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { - run_utf8_validation(v)?; - // SAFETY: Just ran validation. - Ok(unsafe { from_utf8_unchecked_mut(v) }) +#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")] +#[rustc_allow_const_fn_unstable(str_internals)] +#[rustc_allow_const_fn_unstable(const_str_from_utf8_unchecked)] +pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { + // ? is not available in const + match run_utf8_validation(v) { + // SAFETY: Just ran validation. + Ok(_) => Ok(unsafe { from_utf8_unchecked_mut(v) }), + Err(e) => Err(e), + } } /// Converts a slice of bytes to a string slice without checking @@ -183,7 +195,8 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { /// ``` #[inline] #[stable(feature = "str_mut_extras", since = "1.20.0")] -pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { +#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked", issue = "75196")] +pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { // SAFETY: the caller must guarantee that the bytes `v` // are valid UTF-8, thus the cast to `*mut str` is safe. // Also, the pointer dereference is safe because that pointer diff --git a/library/core/src/str/validations.rs b/library/core/src/str/validations.rs index 373a8212425ac..6626d0c69b105 100644 --- a/library/core/src/str/validations.rs +++ b/library/core/src/str/validations.rs @@ -105,21 +105,25 @@ const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize; /// Returns `true` if any byte in the word `x` is nonascii (>= 128). #[inline] -fn contains_nonascii(x: usize) -> bool { +const fn contains_nonascii(x: usize) -> bool { (x & NONASCII_MASK) != 0 } /// Walks through `v` checking that it's a valid UTF-8 sequence, /// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`. #[inline(always)] -pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { +#[rustc_const_unstable(feature = "str_internals", issue = "none")] +pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { let mut index = 0; let len = v.len(); let usize_bytes = mem::size_of::(); let ascii_block_size = 2 * usize_bytes; let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 }; - let align = v.as_ptr().align_offset(usize_bytes); + // FIXME(lf-) align_offset is not const fn yet, so we do it manually + let mask = usize_bytes - 1; + // SAFETY: uh help pls + let align = (usize_bytes - (unsafe { v.as_ptr() as usize } & mask)) & mask; while index < len { let old_offset = index; @@ -230,7 +234,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { } // https://tools.ietf.org/html/rfc3629 -static UTF8_CHAR_WIDTH: [u8; 256] = [ +const UTF8_CHAR_WIDTH: [u8; 256] = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,