Skip to content

Commit 6b0b417

Browse files
committed
Let unicode-table-generator fail gracefully for bitsets
The "Alphabetic" property in Unicode 14 grew too big for the bitset representation, panicking "cannot pack 264 into 8 bits". However, we were already choosing the skiplist for that anyway, so this doesn't need to be a hard failure. That panic is now a returned `Err`, and then in `emit_codepoints` we automatically defer to skiplist.
1 parent e159d42 commit 6b0b417

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

src/tools/unicode-table-generator/src/raw_emitter.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ impl RawEmitter {
2323
writeln!(&mut self.file).unwrap();
2424
}
2525

26-
fn emit_bitset(&mut self, ranges: &[Range<u32>]) {
26+
fn emit_bitset(&mut self, ranges: &[Range<u32>]) -> Result<(), String> {
2727
let last_code_point = ranges.last().unwrap().end;
2828
// bitset for every bit in the codepoint range
2929
//
@@ -44,7 +44,7 @@ impl RawEmitter {
4444
let unique_words =
4545
words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
4646
if unique_words.len() > u8::MAX as usize {
47-
panic!("cannot pack {} into 8 bits", unique_words.len());
47+
return Err(format!("cannot pack {} into 8 bits", unique_words.len()));
4848
}
4949
// needed for the chunk mapping to work
5050
assert_eq!(unique_words[0], 0, "has a zero word");
@@ -105,6 +105,8 @@ impl RawEmitter {
105105
writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap();
106106
writeln!(&mut self.file, " )").unwrap();
107107
writeln!(&mut self.file, "}}").unwrap();
108+
109+
Ok(())
108110
}
109111

110112
fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
@@ -154,12 +156,12 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
154156
emitter.blank_line();
155157

156158
let mut bitset = emitter.clone();
157-
bitset.emit_bitset(&ranges);
159+
let bitset_ok = bitset.emit_bitset(&ranges).is_ok();
158160

159161
let mut skiplist = emitter.clone();
160162
skiplist.emit_skiplist(&ranges);
161163

162-
if bitset.bytes_used <= skiplist.bytes_used {
164+
if bitset_ok && bitset.bytes_used <= skiplist.bytes_used {
163165
*emitter = bitset;
164166
emitter.desc = String::from("bitset");
165167
} else {

0 commit comments

Comments
 (0)