@@ -221,6 +221,91 @@ impl<'a> TryFrom<&'a CStr> for &'a CStr8 {
221
221
}
222
222
}
223
223
224
+ /// Get a Latin-1 character from a UTF-8 byte slice at the given offset.
225
+ ///
226
+ /// Returns a pair containing the Latin-1 character and the number of bytes in
227
+ /// the UTF-8 encoding of that character.
228
+ ///
229
+ /// Panics if the string cannot be encoded in Latin-1.
230
+ ///
231
+ /// # Safety
232
+ ///
233
+ /// The input `bytes` must be valid UTF-8.
234
+ const unsafe fn latin1_from_utf8_at_offset ( bytes : & [ u8 ] , offset : usize ) -> ( u8 , usize ) {
235
+ if bytes[ offset] & 0b1000_0000 == 0b0000_0000 {
236
+ ( bytes[ offset] as u8 , 1 )
237
+ } else if bytes[ offset] & 0b1110_0000 == 0b1100_0000 {
238
+ let a = ( bytes[ offset] & 0b0001_1111 ) as u16 ;
239
+ let b = ( bytes[ offset + 1 ] & 0b0011_1111 ) as u16 ;
240
+ let ch = a << 6 | b;
241
+ if ch > 0xff {
242
+ panic ! ( "input string cannot be encoded as Latin-1" ) ;
243
+ }
244
+ ( ch as u8 , 2 )
245
+ } else {
246
+ // Latin-1 code points only go up to 0xff, so if the input contains any
247
+ // UTF-8 characters larger than two bytes it cannot be converted to
248
+ // Latin-1.
249
+ panic ! ( "input string cannot be encoded as Latin-1" ) ;
250
+ }
251
+ }
252
+
253
+ /// Count the number of Latin-1 characters in a string.
254
+ ///
255
+ /// Panics if the string cannot be encoded in Latin-1.
256
+ ///
257
+ /// This is public but hidden; it is used in the `cstr8` macro.
258
+ pub const fn str_num_latin1_chars ( s : & str ) -> usize {
259
+ let bytes = s. as_bytes ( ) ;
260
+ let len = bytes. len ( ) ;
261
+
262
+ let mut offset = 0 ;
263
+ let mut num_latin1_chars = 0 ;
264
+
265
+ while offset < len {
266
+ // SAFETY: `bytes` is valid UTF-8.
267
+ let ( _, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset ( bytes, offset) } ;
268
+ offset += num_utf8_bytes as usize ;
269
+ num_latin1_chars += 1 ;
270
+ }
271
+
272
+ num_latin1_chars
273
+ }
274
+
275
+ /// Convert a `str` into a null-terminated Latin-1 character array.
276
+ ///
277
+ /// Panics if the string cannot be encoded in Latin-1.
278
+ ///
279
+ /// This is public but hidden; it is used in the `cstr8` macro.
280
+ pub const fn str_to_latin1 < const N : usize > ( s : & str ) -> [ u8 ; N ] {
281
+ let bytes = s. as_bytes ( ) ;
282
+ let len = bytes. len ( ) ;
283
+
284
+ let mut output = [ 0 ; N ] ;
285
+
286
+ let mut output_offset = 0 ;
287
+ let mut input_offset = 0 ;
288
+ while input_offset < len {
289
+ // SAFETY: `bytes` is valid UTF-8.
290
+ let ( ch, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset ( bytes, input_offset) } ;
291
+ if ch == 0 {
292
+ panic ! ( "interior null character" ) ;
293
+ } else {
294
+ output[ output_offset] = ch;
295
+ output_offset += 1 ;
296
+ input_offset += num_utf8_bytes;
297
+ }
298
+ }
299
+
300
+ // The output array must be one bigger than the converted string,
301
+ // to leave room for the trailing null character.
302
+ if output_offset + 1 != N {
303
+ panic ! ( "incorrect array length" ) ;
304
+ }
305
+
306
+ output
307
+ }
308
+
224
309
/// An UCS-2 null-terminated string slice.
225
310
///
226
311
/// This type is largely inspired by [`core::ffi::CStr`] with the exception that all characters are
0 commit comments