From e58dc4a4900059e4128e6ca72fe68015064c6bdd Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 22 Dec 2021 13:13:58 -0600 Subject: [PATCH 1/2] Allow passing `String` to `Encoding::encode` In the common case when converting from UTF8 to UTF8, or the string is all ASCII, this avoids an extra heap allocation for the caller if they only have a `String` available. Previously, they would have to call `encoding.encode(&string).into_owned()` to avoid lifetime errors. --- src/lib.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bce2738a..97ab49a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3257,11 +3257,29 @@ impl Encoding { /// /// Available to Rust only and only with the `alloc` feature enabled (enabled /// by default). + /// + /// # Examples + /// + /// `encode` can be used with either `&str` or `String`. + /// ``` + /// let encoding = encoding_rs::UTF_8; + /// let (bytes, output_encoding, saw_unmappable_chars) = encoding.encode("some string"); + /// let (bytes, output_encoding, saw_unmappable_chars) = encoding.encode(String::from("some string")); + /// ``` #[cfg(feature = "alloc")] - pub fn encode<'a>(&'static self, string: &'a str) -> (Cow<'a, [u8]>, &'static Encoding, bool) { + pub fn encode<'a>( + &'static self, + string: impl Into>, + ) -> (Cow<'a, [u8]>, &'static Encoding, bool) { + let string = string.into(); + let to_cow_bytes = |string: Cow<'a, str>| match string { + Cow::Owned(string) => Cow::Owned(string.into_bytes()), + Cow::Borrowed(str) => Cow::Borrowed(str.as_bytes()), + }; + let output_encoding = self.output_encoding(); if output_encoding == UTF_8 { - return (Cow::Borrowed(string.as_bytes()), output_encoding, false); + return (to_cow_bytes(string), output_encoding, false); } debug_assert!(output_encoding.is_potentially_borrowable()); let bytes = string.as_bytes(); @@ -3271,7 +3289,7 @@ impl Encoding { ascii_valid_up_to(bytes) }; if valid_up_to == bytes.len() { - return (Cow::Borrowed(bytes), output_encoding, false); + return (to_cow_bytes(string), output_encoding, false); } let mut encoder = output_encoding.new_encoder(); let mut vec: Vec = Vec::with_capacity( From 5ea8b3ddde57dc25e970065e4da129396727b3fd Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 22 Dec 2021 13:19:56 -0600 Subject: [PATCH 2/2] Make most of `encode()` non-generic to avoid heavy monomorphization costs --- src/lib.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 97ab49a7..1ef74b37 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3271,7 +3271,14 @@ impl Encoding { &'static self, string: impl Into>, ) -> (Cow<'a, [u8]>, &'static Encoding, bool) { - let string = string.into(); + self.encode_(string.into()) + } + + /// Non-generic version of `encode`, to avoid monomorphizing a large amount of code many times. + fn encode_<'a>( + &'static self, + string: Cow<'a, str>, + ) -> (Cow<'a, [u8]>, &'static Encoding, bool) { let to_cow_bytes = |string: Cow<'a, str>| match string { Cow::Owned(string) => Cow::Owned(string.into_bytes()), Cow::Borrowed(str) => Cow::Borrowed(str.as_bytes()),