Skip to content

Commit bdb290c

Browse files
uefi: Add new cstr8 implementation
Implement cstr8 as a proc macro, similar to cstr16.
1 parent 02e06d1 commit bdb290c

File tree

4 files changed

+134
-15
lines changed

4 files changed

+134
-15
lines changed

uefi/src/data_types/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ pub use strs::{
151151
CStr16, CStr8, EqStrUntilNul, FromSliceWithNulError, FromStrWithBufError, UnalignedCStr16Error,
152152
};
153153

154+
/// These functions are used in the implementation of the [`cstr8`] macro.
155+
#[doc(hidden)]
156+
pub use strs::{str_num_latin1_chars, str_to_latin1};
157+
154158
#[cfg(feature = "alloc")]
155159
mod owned_strs;
156160
#[cfg(feature = "alloc")]

uefi/src/data_types/strs.rs

+85
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,91 @@ impl<'a> TryFrom<&'a CStr> for &'a CStr8 {
221221
}
222222
}
223223

224+
/// Get a Latin-1 character from a UTF-8 byte slice at the given offset.
225+
///
226+
/// Returns a pair containing the Latin-1 character and the number of bytes in
227+
/// the UTF-8 encoding of that character.
228+
///
229+
/// Panics if the string cannot be encoded in Latin-1.
230+
///
231+
/// # Safety
232+
///
233+
/// The input `bytes` must be valid UTF-8.
234+
const unsafe fn latin1_from_utf8_at_offset(bytes: &[u8], offset: usize) -> (u8, usize) {
235+
if bytes[offset] & 0b1000_0000 == 0b0000_0000 {
236+
(bytes[offset] as u8, 1)
237+
} else if bytes[offset] & 0b1110_0000 == 0b1100_0000 {
238+
let a = (bytes[offset] & 0b0001_1111) as u16;
239+
let b = (bytes[offset + 1] & 0b0011_1111) as u16;
240+
let ch = a << 6 | b;
241+
if ch > 0xff {
242+
panic!("input string cannot be encoded as Latin-1");
243+
}
244+
(ch as u8, 2)
245+
} else {
246+
// Latin-1 code points only go up to 0xff, so if the input contains any
247+
// UTF-8 characters larger than two bytes it cannot be converted to
248+
// Latin-1.
249+
panic!("input string cannot be encoded as Latin-1");
250+
}
251+
}
252+
253+
/// Count the number of Latin-1 characters in a string.
254+
///
255+
/// Panics if the string cannot be encoded in Latin-1.
256+
///
257+
/// This is public but hidden; it is used in the `cstr8` macro.
258+
pub const fn str_num_latin1_chars(s: &str) -> usize {
259+
let bytes = s.as_bytes();
260+
let len = bytes.len();
261+
262+
let mut offset = 0;
263+
let mut num_latin1_chars = 0;
264+
265+
while offset < len {
266+
// SAFETY: `bytes` is valid UTF-8.
267+
let (_, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, offset) };
268+
offset += num_utf8_bytes as usize;
269+
num_latin1_chars += 1;
270+
}
271+
272+
num_latin1_chars
273+
}
274+
275+
/// Convert a `str` into a null-terminated Latin-1 character array.
276+
///
277+
/// Panics if the string cannot be encoded in Latin-1.
278+
///
279+
/// This is public but hidden; it is used in the `cstr8` macro.
280+
pub const fn str_to_latin1<const N: usize>(s: &str) -> [u8; N] {
281+
let bytes = s.as_bytes();
282+
let len = bytes.len();
283+
284+
let mut output = [0; N];
285+
286+
let mut output_offset = 0;
287+
let mut input_offset = 0;
288+
while input_offset < len {
289+
// SAFETY: `bytes` is valid UTF-8.
290+
let (ch, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, input_offset) };
291+
if ch == 0 {
292+
panic!("interior null character");
293+
} else {
294+
output[output_offset] = ch;
295+
output_offset += 1;
296+
input_offset += num_utf8_bytes;
297+
}
298+
}
299+
300+
// The output array must be one bigger than the converted string,
301+
// to leave room for the trailing null character.
302+
if output_offset + 1 != N {
303+
panic!("incorrect array length");
304+
}
305+
306+
output
307+
}
308+
224309
/// An UCS-2 null-terminated string slice.
225310
///
226311
/// This type is largely inspired by [`core::ffi::CStr`] with the exception that all characters are

uefi/src/lib.rs

+1-15
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ pub mod data_types;
113113
#[cfg(feature = "alloc")]
114114
pub use data_types::CString16;
115115
pub use data_types::{CStr16, CStr8, Char16, Char8, Event, Guid, Handle, Identify};
116-
pub use uefi_macros::{cstr8, entry};
116+
pub use uefi_macros::entry;
117117
pub use uguid::guid;
118118

119119
mod result;
@@ -140,17 +140,3 @@ pub mod helpers;
140140

141141
mod macros;
142142
mod util;
143-
144-
#[cfg(test)]
145-
// Crates that create procedural macros can't unit test the macros they export.
146-
// Therefore, we do some tests here.
147-
mod macro_tests {
148-
use crate::cstr8;
149-
150-
#[test]
151-
fn cstr8_macro_literal() {
152-
let _empty1 = cstr8!();
153-
let _empty2 = cstr8!("");
154-
let _regular = cstr8!("foobar");
155-
}
156-
}

uefi/src/macros.rs

+44
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,47 @@
1+
/// Encode a string literal as a [`&CStr8`].
2+
///
3+
/// The encoding is done at compile time, so the result can be used in a
4+
/// `const` item.
5+
///
6+
/// An empty string containing just a null character can be created with either
7+
/// `cstr8!()` or `cstr8!("")`.
8+
///
9+
/// # Example
10+
///
11+
/// ```
12+
/// use uefi::{CStr8, cstr8};
13+
///
14+
/// const S: &CStr8 = cstr8!("abÿ");
15+
/// assert_eq!(S.as_bytes(), [97, 98, 255, 0]);
16+
///
17+
/// const EMPTY: &CStr8 = cstr8!();
18+
/// assert_eq!(EMPTY.as_bytes(), [0]);
19+
/// assert_eq!(cstr8!(""), EMPTY);
20+
/// ```
21+
///
22+
/// [`&CStr8`]: crate::CStr8
23+
#[macro_export]
24+
macro_rules! cstr8 {
25+
() => {{
26+
const S: &[u8] = &[0];
27+
// SAFETY: `S` is a trivially correct Latin-1 C string.
28+
unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(S) }
29+
}};
30+
($s:literal) => {{
31+
// Use `const` values here to force errors to happen at compile
32+
// time.
33+
34+
// Add one for the null char.
35+
const NUM_CHARS: usize = $crate::data_types::str_num_latin1_chars($s) + 1;
36+
37+
const VAL: [u8; NUM_CHARS] = $crate::data_types::str_to_latin1($s);
38+
39+
// SAFETY: the `str_to_latin1` function always produces a valid Latin-1
40+
// string with a trailing null character.
41+
unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(&VAL) }
42+
}};
43+
}
44+
145
/// Encode a string literal as a [`&CStr16`].
246
///
347
/// The encoding is done at compile time, so the result can be used in a

0 commit comments

Comments
 (0)