Skip to content

Commit c625a42

Browse files
phlip9TheBlueMatt
authored andcommitted
offers: avoid panic when truncating payer_note in UTF-8 code point
`String::truncate` takes a byte index but panics if we split in the middle of a UTF-8 codepoint. Sadly, in `InvoiceRequest::fields` we want to tuncate the payer note to a maximum of 512 bytes, which may be in the middle of a UTF-8 codepoint and can cause panic. Here we iterate over the bytes in the string until we find one not in the middle of a UTF-8 codepoint and then split the string there.
1 parent 5dcd6c4 commit c625a42

File tree

1 file changed

+62
-6
lines changed

1 file changed

+62
-6
lines changed

lightning/src/offers/invoice_request.rs

+62-6
Original file line numberDiff line numberDiff line change
@@ -998,15 +998,37 @@ impl VerifiedInvoiceRequest {
998998
InvoiceRequestFields {
999999
payer_signing_pubkey: *payer_signing_pubkey,
10001000
quantity: *quantity,
1001-
payer_note_truncated: payer_note.clone().map(|mut s| {
1002-
s.truncate(PAYER_NOTE_LIMIT);
1003-
UntrustedString(s)
1004-
}),
1001+
payer_note_truncated: payer_note
1002+
.clone()
1003+
// Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
1004+
// down to the nearest valid UTF-8 code point boundary.
1005+
.map(|s| UntrustedString(string_truncate_safe(s, PAYER_NOTE_LIMIT))),
10051006
human_readable_name: self.offer_from_hrn().clone(),
10061007
}
10071008
}
10081009
}
10091010

1011+
/// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
1012+
/// which would leave the `String` containing invalid UTF-8. This function will
1013+
/// instead truncate the string to the next smaller code point boundary so the
1014+
/// truncated string always remains valid UTF-8.
1015+
///
1016+
/// This can still split a grapheme cluster, but that's probably fine.
1017+
/// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
1018+
/// unicode tables to find the next smaller grapheme cluster boundary.
1019+
fn string_truncate_safe(mut s: String, new_len: usize) -> String {
1020+
// Finds the largest byte index `x` not exceeding byte index `index` where
1021+
// `s.is_char_boundary(x)` is true.
1022+
// TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
1023+
let truncated_len = if new_len >= s.len() {
1024+
s.len()
1025+
} else {
1026+
(0..=new_len).rev().find(|idx| s.is_char_boundary(*idx)).unwrap_or(0)
1027+
};
1028+
s.truncate(truncated_len);
1029+
s
1030+
}
1031+
10101032
impl InvoiceRequestContents {
10111033
pub(super) fn metadata(&self) -> &[u8] {
10121034
self.inner.metadata()
@@ -1426,6 +1448,7 @@ mod tests {
14261448
use crate::ln::inbound_payment::ExpandedKey;
14271449
use crate::ln::msgs::{DecodeError, MAX_VALUE_MSAT};
14281450
use crate::offers::invoice::{Bolt12Invoice, SIGNATURE_TAG as INVOICE_SIGNATURE_TAG};
1451+
use crate::offers::invoice_request::string_truncate_safe;
14291452
use crate::offers::merkle::{self, SignatureTlvStreamRef, TaggedHash, TlvStream};
14301453
use crate::offers::nonce::Nonce;
14311454
#[cfg(not(c_bindings))]
@@ -2947,14 +2970,20 @@ mod tests {
29472970
.unwrap();
29482971
assert_eq!(offer.issuer_signing_pubkey(), Some(node_id));
29492972

2973+
// UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2974+
// because it would split a multi-byte UTF-8 code point.
2975+
let payer_note = "❤️".repeat(86);
2976+
assert_eq!(payer_note.len(), PAYER_NOTE_LIMIT + 4);
2977+
let expected_payer_note = "❤️".repeat(85);
2978+
29502979
let invoice_request = offer
29512980
.request_invoice(&expanded_key, nonce, &secp_ctx, payment_id)
29522981
.unwrap()
29532982
.chain(Network::Testnet)
29542983
.unwrap()
29552984
.quantity(1)
29562985
.unwrap()
2957-
.payer_note("0".repeat(PAYER_NOTE_LIMIT * 2))
2986+
.payer_note(payer_note)
29582987
.build_and_sign()
29592988
.unwrap();
29602989
match invoice_request.verify_using_metadata(&expanded_key, &secp_ctx) {
@@ -2966,7 +2995,7 @@ mod tests {
29662995
InvoiceRequestFields {
29672996
payer_signing_pubkey: invoice_request.payer_signing_pubkey(),
29682997
quantity: Some(1),
2969-
payer_note_truncated: Some(UntrustedString("0".repeat(PAYER_NOTE_LIMIT))),
2998+
payer_note_truncated: Some(UntrustedString(expected_payer_note)),
29702999
human_readable_name: None,
29713000
}
29723001
);
@@ -2981,4 +3010,31 @@ mod tests {
29813010
Err(_) => panic!("unexpected error"),
29823011
}
29833012
}
3013+
3014+
#[test]
3015+
fn test_string_truncate_safe() {
3016+
// We'll correctly truncate to the nearest UTF-8 code point boundary:
3017+
// ❤ variation-selector
3018+
// e29da4 efb88f
3019+
let s = String::from("❤️");
3020+
assert_eq!(s.len(), 6);
3021+
assert_eq!(s, string_truncate_safe(s.clone(), 7));
3022+
assert_eq!(s, string_truncate_safe(s.clone(), 6));
3023+
assert_eq!("❤", string_truncate_safe(s.clone(), 5));
3024+
assert_eq!("❤", string_truncate_safe(s.clone(), 4));
3025+
assert_eq!("❤", string_truncate_safe(s.clone(), 3));
3026+
assert_eq!("", string_truncate_safe(s.clone(), 2));
3027+
assert_eq!("", string_truncate_safe(s.clone(), 1));
3028+
assert_eq!("", string_truncate_safe(s.clone(), 0));
3029+
3030+
// Every byte in an ASCII string is also a full UTF-8 code point.
3031+
let s = String::from("my ASCII string!");
3032+
for new_len in 0..(s.len() + 5) {
3033+
if new_len >= s.len() {
3034+
assert_eq!(s, string_truncate_safe(s.clone(), new_len));
3035+
} else {
3036+
assert_eq!(s[..new_len], string_truncate_safe(s.clone(), new_len));
3037+
}
3038+
}
3039+
}
29843040
}

0 commit comments

Comments
 (0)