@@ -998,15 +998,37 @@ impl VerifiedInvoiceRequest {
998
998
InvoiceRequestFields {
999
999
payer_signing_pubkey : * payer_signing_pubkey,
1000
1000
quantity : * quantity,
1001
- payer_note_truncated : payer_note. clone ( ) . map ( |mut s| {
1002
- s. truncate ( PAYER_NOTE_LIMIT ) ;
1003
- UntrustedString ( s)
1004
- } ) ,
1001
+ payer_note_truncated : payer_note
1002
+ . clone ( )
1003
+ // Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
1004
+ // down to the nearest valid UTF-8 code point boundary.
1005
+ . map ( |s| UntrustedString ( string_truncate_safe ( s, PAYER_NOTE_LIMIT ) ) ) ,
1005
1006
human_readable_name : self . offer_from_hrn ( ) . clone ( ) ,
1006
1007
}
1007
1008
}
1008
1009
}
1009
1010
1011
+ /// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
1012
+ /// which would leave the `String` containing invalid UTF-8. This function will
1013
+ /// instead truncate the string to the next smaller code point boundary so the
1014
+ /// truncated string always remains valid UTF-8.
1015
+ ///
1016
+ /// This can still split a grapheme cluster, but that's probably fine.
1017
+ /// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
1018
+ /// unicode tables to find the next smaller grapheme cluster boundary.
1019
+ fn string_truncate_safe ( mut s : String , new_len : usize ) -> String {
1020
+ // Finds the largest byte index `x` not exceeding byte index `index` where
1021
+ // `s.is_char_boundary(x)` is true.
1022
+ // TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
1023
+ let truncated_len = if new_len >= s. len ( ) {
1024
+ s. len ( )
1025
+ } else {
1026
+ ( 0 ..=new_len) . rev ( ) . find ( |idx| s. is_char_boundary ( * idx) ) . unwrap_or ( 0 )
1027
+ } ;
1028
+ s. truncate ( truncated_len) ;
1029
+ s
1030
+ }
1031
+
1010
1032
impl InvoiceRequestContents {
1011
1033
pub ( super ) fn metadata ( & self ) -> & [ u8 ] {
1012
1034
self . inner . metadata ( )
@@ -1426,6 +1448,7 @@ mod tests {
1426
1448
use crate :: ln:: inbound_payment:: ExpandedKey ;
1427
1449
use crate :: ln:: msgs:: { DecodeError , MAX_VALUE_MSAT } ;
1428
1450
use crate :: offers:: invoice:: { Bolt12Invoice , SIGNATURE_TAG as INVOICE_SIGNATURE_TAG } ;
1451
+ use crate :: offers:: invoice_request:: string_truncate_safe;
1429
1452
use crate :: offers:: merkle:: { self , SignatureTlvStreamRef , TaggedHash , TlvStream } ;
1430
1453
use crate :: offers:: nonce:: Nonce ;
1431
1454
#[ cfg( not( c_bindings) ) ]
@@ -2947,14 +2970,20 @@ mod tests {
2947
2970
. unwrap ( ) ;
2948
2971
assert_eq ! ( offer. issuer_signing_pubkey( ) , Some ( node_id) ) ;
2949
2972
2973
+ // UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2974
+ // because it would split a multi-byte UTF-8 code point.
2975
+ let payer_note = "❤️" . repeat ( 86 ) ;
2976
+ assert_eq ! ( payer_note. len( ) , PAYER_NOTE_LIMIT + 4 ) ;
2977
+ let expected_payer_note = "❤️" . repeat ( 85 ) ;
2978
+
2950
2979
let invoice_request = offer
2951
2980
. request_invoice ( & expanded_key, nonce, & secp_ctx, payment_id)
2952
2981
. unwrap ( )
2953
2982
. chain ( Network :: Testnet )
2954
2983
. unwrap ( )
2955
2984
. quantity ( 1 )
2956
2985
. unwrap ( )
2957
- . payer_note ( "0" . repeat ( PAYER_NOTE_LIMIT * 2 ) )
2986
+ . payer_note ( payer_note )
2958
2987
. build_and_sign ( )
2959
2988
. unwrap ( ) ;
2960
2989
match invoice_request. verify_using_metadata ( & expanded_key, & secp_ctx) {
@@ -2966,7 +2995,7 @@ mod tests {
2966
2995
InvoiceRequestFields {
2967
2996
payer_signing_pubkey: invoice_request. payer_signing_pubkey( ) ,
2968
2997
quantity: Some ( 1 ) ,
2969
- payer_note_truncated: Some ( UntrustedString ( "0" . repeat ( PAYER_NOTE_LIMIT ) ) ) ,
2998
+ payer_note_truncated: Some ( UntrustedString ( expected_payer_note ) ) ,
2970
2999
human_readable_name: None ,
2971
3000
}
2972
3001
) ;
@@ -2981,4 +3010,31 @@ mod tests {
2981
3010
Err ( _) => panic ! ( "unexpected error" ) ,
2982
3011
}
2983
3012
}
3013
+
3014
+ #[ test]
3015
+ fn test_string_truncate_safe ( ) {
3016
+ // We'll correctly truncate to the nearest UTF-8 code point boundary:
3017
+ // ❤ variation-selector
3018
+ // e29da4 efb88f
3019
+ let s = String :: from ( "❤️" ) ;
3020
+ assert_eq ! ( s. len( ) , 6 ) ;
3021
+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , 7 ) ) ;
3022
+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , 6 ) ) ;
3023
+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , 5 ) ) ;
3024
+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , 4 ) ) ;
3025
+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , 3 ) ) ;
3026
+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , 2 ) ) ;
3027
+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , 1 ) ) ;
3028
+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , 0 ) ) ;
3029
+
3030
+ // Every byte in an ASCII string is also a full UTF-8 code point.
3031
+ let s = String :: from ( "my ASCII string!" ) ;
3032
+ for new_len in 0 ..( s. len ( ) + 5 ) {
3033
+ if new_len >= s. len ( ) {
3034
+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , new_len) ) ;
3035
+ } else {
3036
+ assert_eq ! ( s[ ..new_len] , string_truncate_safe( s. clone( ) , new_len) ) ;
3037
+ }
3038
+ }
3039
+ }
2984
3040
}
0 commit comments