Skip to content

Commit 65aae97

Browse files
authored
Merge pull request #1151 from nicholasbishop/bishop-rework-str-macros
Replace `cstr8!` with a declarative macro
2 parents 02e06d1 + 7b1fa84 commit 65aae97

File tree

6 files changed

+139
-59
lines changed

6 files changed

+139
-59
lines changed

uefi-macros/CHANGELOG.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# uefi-macros - [Unreleased]
22

33
## Removed
4-
- Removed the `cstr16` macro. Use the `cstr16` declarative macro exported by the
5-
`uefi` crate instead.
4+
- Removed the `cstr8` and `cstr16` macros. Use the declarative macros of the
5+
same names exported by the `uefi` crate as a replacement.
66

77
# uefi-macros - 0.13.0 (2023-11-12)
88

uefi-macros/src/lib.rs

+1-42
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use quote::{quote, quote_spanned, TokenStreamExt};
99
use syn::spanned::Spanned;
1010
use syn::{
1111
parse_macro_input, parse_quote, Error, Expr, ExprLit, ExprPath, FnArg, Ident, ItemFn,
12-
ItemStruct, Lit, LitStr, Pat, Visibility,
12+
ItemStruct, Lit, Pat, Visibility,
1313
};
1414

1515
macro_rules! err {
@@ -247,44 +247,3 @@ pub fn entry(args: TokenStream, input: TokenStream) -> TokenStream {
247247
};
248248
result.into()
249249
}
250-
251-
/// Builds a `CStr8` literal at compile time from a string literal.
252-
///
253-
/// This will throw a compile error if an invalid character is in the passed string.
254-
///
255-
/// # Example
256-
/// ```
257-
/// # use uefi_macros::cstr8;
258-
/// // Empty string
259-
/// assert_eq!(cstr8!().to_u16_slice_with_nul(), [0]);
260-
/// assert_eq!(cstr8!("").to_u16_slice_with_nul(), [0]);
261-
/// // Non-empty string
262-
/// assert_eq!(cstr8!("test").as_bytes(), [116, 101, 115, 116, 0]);
263-
/// ```
264-
#[proc_macro]
265-
pub fn cstr8(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
266-
// Accept empty input.
267-
if input.is_empty() {
268-
return quote!(unsafe { ::uefi::CStr16::from_u16_with_nul_unchecked(&[0]) }).into();
269-
}
270-
let input: LitStr = parse_macro_input!(input);
271-
let input = input.value();
272-
// Accept "" input.
273-
if input.is_empty() {
274-
return quote!(unsafe { ::uefi::CStr16::from_u16_with_nul_unchecked(&[0]) }).into();
275-
}
276-
277-
// Accept any non-empty string input.
278-
match input
279-
.chars()
280-
.map(u8::try_from)
281-
.collect::<Result<Vec<u8>, _>>()
282-
{
283-
Ok(c) => {
284-
quote!(unsafe { ::uefi::CStr8::from_bytes_with_nul_unchecked(&[ #(#c),* , 0 ]) }).into()
285-
}
286-
Err(_) => syn::Error::new_spanned(input, "invalid character in string")
287-
.into_compile_error()
288-
.into(),
289-
}
290-
}

uefi/src/data_types/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ pub use strs::{
151151
CStr16, CStr8, EqStrUntilNul, FromSliceWithNulError, FromStrWithBufError, UnalignedCStr16Error,
152152
};
153153

154+
/// These functions are used in the implementation of the [`cstr8`] macro.
155+
#[doc(hidden)]
156+
pub use strs::{str_num_latin1_chars, str_to_latin1};
157+
154158
#[cfg(feature = "alloc")]
155159
mod owned_strs;
156160
#[cfg(feature = "alloc")]

uefi/src/data_types/strs.rs

+87
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,93 @@ impl<'a> TryFrom<&'a CStr> for &'a CStr8 {
221221
}
222222
}
223223

224+
/// Get a Latin-1 character from a UTF-8 byte slice at the given offset.
225+
///
226+
/// Returns a pair containing the Latin-1 character and the number of bytes in
227+
/// the UTF-8 encoding of that character.
228+
///
229+
/// Panics if the string cannot be encoded in Latin-1.
230+
///
231+
/// # Safety
232+
///
233+
/// The input `bytes` must be valid UTF-8.
234+
const unsafe fn latin1_from_utf8_at_offset(bytes: &[u8], offset: usize) -> (u8, usize) {
235+
if bytes[offset] & 0b1000_0000 == 0b0000_0000 {
236+
(bytes[offset], 1)
237+
} else if bytes[offset] & 0b1110_0000 == 0b1100_0000 {
238+
let a = (bytes[offset] & 0b0001_1111) as u16;
239+
let b = (bytes[offset + 1] & 0b0011_1111) as u16;
240+
let ch = a << 6 | b;
241+
if ch > 0xff {
242+
panic!("input string cannot be encoded as Latin-1");
243+
}
244+
(ch as u8, 2)
245+
} else {
246+
// Latin-1 code points only go up to 0xff, so if the input contains any
247+
// UTF-8 characters larger than two bytes it cannot be converted to
248+
// Latin-1.
249+
panic!("input string cannot be encoded as Latin-1");
250+
}
251+
}
252+
253+
/// Count the number of Latin-1 characters in a string.
254+
///
255+
/// Panics if the string cannot be encoded in Latin-1.
256+
///
257+
/// This is public but hidden; it is used in the `cstr8` macro.
258+
#[must_use]
259+
pub const fn str_num_latin1_chars(s: &str) -> usize {
260+
let bytes = s.as_bytes();
261+
let len = bytes.len();
262+
263+
let mut offset = 0;
264+
let mut num_latin1_chars = 0;
265+
266+
while offset < len {
267+
// SAFETY: `bytes` is valid UTF-8.
268+
let (_, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, offset) };
269+
offset += num_utf8_bytes;
270+
num_latin1_chars += 1;
271+
}
272+
273+
num_latin1_chars
274+
}
275+
276+
/// Convert a `str` into a null-terminated Latin-1 character array.
277+
///
278+
/// Panics if the string cannot be encoded in Latin-1.
279+
///
280+
/// This is public but hidden; it is used in the `cstr8` macro.
281+
#[must_use]
282+
pub const fn str_to_latin1<const N: usize>(s: &str) -> [u8; N] {
283+
let bytes = s.as_bytes();
284+
let len = bytes.len();
285+
286+
let mut output = [0; N];
287+
288+
let mut output_offset = 0;
289+
let mut input_offset = 0;
290+
while input_offset < len {
291+
// SAFETY: `bytes` is valid UTF-8.
292+
let (ch, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, input_offset) };
293+
if ch == 0 {
294+
panic!("interior null character");
295+
} else {
296+
output[output_offset] = ch;
297+
output_offset += 1;
298+
input_offset += num_utf8_bytes;
299+
}
300+
}
301+
302+
// The output array must be one bigger than the converted string,
303+
// to leave room for the trailing null character.
304+
if output_offset + 1 != N {
305+
panic!("incorrect array length");
306+
}
307+
308+
output
309+
}
310+
224311
/// An UCS-2 null-terminated string slice.
225312
///
226313
/// This type is largely inspired by [`core::ffi::CStr`] with the exception that all characters are

uefi/src/lib.rs

+1-15
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ pub mod data_types;
113113
#[cfg(feature = "alloc")]
114114
pub use data_types::CString16;
115115
pub use data_types::{CStr16, CStr8, Char16, Char8, Event, Guid, Handle, Identify};
116-
pub use uefi_macros::{cstr8, entry};
116+
pub use uefi_macros::entry;
117117
pub use uguid::guid;
118118

119119
mod result;
@@ -140,17 +140,3 @@ pub mod helpers;
140140

141141
mod macros;
142142
mod util;
143-
144-
#[cfg(test)]
145-
// Crates that create procedural macros can't unit test the macros they export.
146-
// Therefore, we do some tests here.
147-
mod macro_tests {
148-
use crate::cstr8;
149-
150-
#[test]
151-
fn cstr8_macro_literal() {
152-
let _empty1 = cstr8!();
153-
let _empty2 = cstr8!("");
154-
let _regular = cstr8!("foobar");
155-
}
156-
}

uefi/src/macros.rs

+44
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,47 @@
1+
/// Encode a string literal as a [`&CStr8`].
2+
///
3+
/// The encoding is done at compile time, so the result can be used in a
4+
/// `const` item.
5+
///
6+
/// An empty string containing just a null character can be created with either
7+
/// `cstr8!()` or `cstr8!("")`.
8+
///
9+
/// # Example
10+
///
11+
/// ```
12+
/// use uefi::{CStr8, cstr8};
13+
///
14+
/// const S: &CStr8 = cstr8!("abÿ");
15+
/// assert_eq!(S.as_bytes(), [97, 98, 255, 0]);
16+
///
17+
/// const EMPTY: &CStr8 = cstr8!();
18+
/// assert_eq!(EMPTY.as_bytes(), [0]);
19+
/// assert_eq!(cstr8!(""), EMPTY);
20+
/// ```
21+
///
22+
/// [`&CStr8`]: crate::CStr8
23+
#[macro_export]
24+
macro_rules! cstr8 {
25+
() => {{
26+
const S: &[u8] = &[0];
27+
// SAFETY: `S` is a trivially correct Latin-1 C string.
28+
unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(S) }
29+
}};
30+
($s:literal) => {{
31+
// Use `const` values here to force errors to happen at compile
32+
// time.
33+
34+
// Add one for the null char.
35+
const NUM_CHARS: usize = $crate::data_types::str_num_latin1_chars($s) + 1;
36+
37+
const VAL: [u8; NUM_CHARS] = $crate::data_types::str_to_latin1($s);
38+
39+
// SAFETY: the `str_to_latin1` function always produces a valid Latin-1
40+
// string with a trailing null character.
41+
unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(&VAL) }
42+
}};
43+
}
44+
145
/// Encode a string literal as a [`&CStr16`].
246
///
347
/// The encoding is done at compile time, so the result can be used in a

0 commit comments

Comments
 (0)