Skip to content

Commit fe3e8d1

Browse files
committed
string compression: save a few bits per string
Length was stored as a 16-bit number always. Most translations have a max length far less. For example, US English translation lengths always fit in just 8 bits. probably all languages fit in 9 bits. This also has the side effect of reducing the alignment of compressed_string_t from 2 bytes to 1. testing performed: ran in german and english on pyruler, printed messages looked right. Firmware size, en_US Before: 3044 bytes free in flash After: 3408 bytes free in flash Firmware size, de_DE (with adafruit#2967 merged to restore translations) Before: 1236 bytes free in flash After: 1600 bytes free in flash
1 parent 0db8b88 commit fe3e8d1

File tree

8 files changed

+74
-27
lines changed

8 files changed

+74
-27
lines changed

main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ bool maybe_run_list(const char ** filenames, pyexec_result_t* exec_result) {
185185
}
186186
mp_hal_stdout_tx_str(filename);
187187
const compressed_string_t* compressed = translate(" output:\n");
188-
char decompressed[compressed->length];
188+
char decompressed[decompress_length(compressed)];
189189
decompress(compressed, decompressed);
190190
mp_hal_stdout_tx_str(decompressed);
191191
pyexec_file(filename, exec_result);

py/builtinhelp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ STATIC void mp_help_print_modules(void) {
135135

136136
// let the user know there may be other modules available from the filesystem
137137
const compressed_string_t* compressed = translate("Plus any modules on the filesystem\n");
138-
char decompressed[compressed->length];
138+
char decompressed[decompress_length(compressed)];
139139
decompress(compressed, decompressed);
140140
mp_print_str(MP_PYTHON_PRINTER, decompressed);
141141
}
@@ -181,7 +181,7 @@ STATIC mp_obj_t mp_builtin_help(size_t n_args, const mp_obj_t *args) {
181181
// print a general help message. Translate only works on single strings on one line.
182182
const compressed_string_t* compressed =
183183
translate("Welcome to Adafruit CircuitPython %s!\n\nPlease visit learn.adafruit.com/category/circuitpython for project guides.\n\nTo list built-in modules please do `help(\"modules\")`.\n");
184-
char decompressed[compressed->length];
184+
char decompressed[decompress_length(compressed)];
185185
decompress(compressed, decompressed);
186186
mp_printf(MP_PYTHON_PRINTER, decompressed, MICROPY_GIT_TAG);
187187
} else {

py/makeqstrdata.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Process raw qstr file and output qstr data with length, hash and data bytes.
33
4-
This script works with Python 2.6, 2.7, 3.3 and 3.4.
4+
This script works with Python 2.7, 3.3 and 3.4.
55
"""
66

77
from __future__ import print_function
@@ -132,19 +132,37 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
132132
print("// estimated total memory size", len(lengths) + 2*len(values) + sum(len(cb[u]) for u in all_strings_concat))
133133
print("//", values, lengths)
134134
values_type = "uint16_t" if max(ord(u) for u in values) > 255 else "uint8_t"
135+
max_translation_encoded_length = max(len(translation.encode("utf-8")) for original,translation in translations)
135136
with open(compression_filename, "w") as f:
136137
f.write("const uint8_t lengths[] = {{ {} }};\n".format(", ".join(map(str, lengths))))
137138
f.write("const {} values[] = {{ {} }};\n".format(values_type, ", ".join(str(ord(u)) for u in values)))
139+
f.write("#define compress_max_length_bits ({})\n".format(max_translation_encoded_length.bit_length()))
138140
return values, lengths
139141

140-
def decompress(encoding_table, length, encoded):
142+
def decompress(encoding_table, encoded, encoded_length_bits):
141143
values, lengths = encoding_table
142-
#print(l, encoded)
143144
dec = []
144145
this_byte = 0
145146
this_bit = 7
146147
b = encoded[this_byte]
147-
for i in range(length):
148+
bits = 0
149+
for i in range(encoded_length_bits):
150+
bits <<= 1
151+
if 0x80 & b:
152+
bits |= 1
153+
154+
b <<= 1
155+
if this_bit == 0:
156+
this_bit = 7
157+
this_byte += 1
158+
if this_byte < len(encoded):
159+
b = encoded[this_byte]
160+
else:
161+
this_bit -= 1
162+
length = bits
163+
164+
i = 0
165+
while i < length:
148166
bits = 0
149167
bit_length = 0
150168
max_code = lengths[0]
@@ -170,10 +188,11 @@ def decompress(encoding_table, length, encoded):
170188
searched_length += lengths[bit_length]
171189

172190
v = values[searched_length + bits - max_code]
191+
i += len(v.encode('utf-8'))
173192
dec.append(v)
174193
return ''.join(dec)
175194

176-
def compress(encoding_table, decompressed):
195+
def compress(encoding_table, decompressed, encoded_length_bits, len_translation_encoded):
177196
if not isinstance(decompressed, str):
178197
raise TypeError()
179198
values, lengths = encoding_table
@@ -182,6 +201,19 @@ def compress(encoding_table, decompressed):
182201
#print(lengths)
183202
current_bit = 7
184203
current_byte = 0
204+
205+
code = len_translation_encoded
206+
bits = encoded_length_bits+1
207+
for i in range(bits - 1, 0, -1):
208+
if len_translation_encoded & (1 << (i - 1)):
209+
enc[current_byte] |= 1 << current_bit
210+
if current_bit == 0:
211+
current_bit = 7
212+
#print("packed {0:0{width}b}".format(enc[current_byte], width=8))
213+
current_byte += 1
214+
else:
215+
current_bit -= 1
216+
185217
for c in decompressed:
186218
#print()
187219
#print("char", c, values.index(c))
@@ -342,14 +374,17 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
342374

343375
total_text_size = 0
344376
total_text_compressed_size = 0
377+
max_translation_encoded_length = max(len(translation.encode("utf-8")) for original, translation in i18ns)
378+
encoded_length_bits = max_translation_encoded_length.bit_length()
345379
for original, translation in i18ns:
346380
translation_encoded = translation.encode("utf-8")
347-
compressed = compress(encoding_table, translation)
381+
compressed = compress(encoding_table, translation, encoded_length_bits, len(translation_encoded))
348382
total_text_compressed_size += len(compressed)
349-
decompressed = decompress(encoding_table, len(translation_encoded), compressed)
383+
decompressed = decompress(encoding_table, compressed, encoded_length_bits)
384+
assert decompressed == translation
350385
for c in C_ESCAPES:
351386
decompressed = decompressed.replace(c, C_ESCAPES[c])
352-
print("TRANSLATION(\"{}\", {}, {{ {} }}) // {}".format(original, len(translation_encoded)+1, ", ".join(["0x{:02x}".format(x) for x in compressed]), decompressed))
387+
print("TRANSLATION(\"{}\", {}) // {}".format(original, ", ".join(["{:d}".format(x) for x in compressed]), decompressed))
353388
total_text_size += len(translation.encode("utf-8"))
354389

355390
print()
@@ -385,6 +420,7 @@ def print_qstr_enums(qstrs):
385420

386421
qcfgs, qstrs, i18ns = parse_input_headers(args.infiles)
387422
if args.translation:
423+
i18ns = sorted(i18ns)
388424
translations = translate(args.translation, i18ns)
389425
encoding_table = compute_huffman_coding(translations, qstrs, args.compression_filename)
390426
print_qstr_data(encoding_table, qcfgs, qstrs, translations)

py/moduerrno.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ const char *mp_common_errno_to_str(mp_obj_t errno_val, char *buf, size_t len) {
158158
case ENOSPC: desc = translate("No space left on device"); break;
159159
case EROFS: desc = translate("Read-only filesystem"); break;
160160
}
161-
if (desc != NULL && desc->length <= len) {
161+
if (desc != NULL && decompress_length(desc) <= len) {
162162
decompress(desc, buf);
163163
return buf;
164164
}

py/obj.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,17 @@ void mp_obj_print_exception(const mp_print_t *print, mp_obj_t exc) {
9494
assert(n % 3 == 0);
9595
// Decompress the format strings
9696
const compressed_string_t* traceback = translate("Traceback (most recent call last):\n");
97-
char decompressed[traceback->length];
97+
char decompressed[decompress_length(traceback)];
9898
decompress(traceback, decompressed);
9999
#if MICROPY_ENABLE_SOURCE_LINE
100100
const compressed_string_t* frame = translate(" File \"%q\", line %d");
101101
#else
102102
const compressed_string_t* frame = translate(" File \"%q\"");
103103
#endif
104-
char decompressed_frame[frame->length];
104+
char decompressed_frame[decompress_length(frame)];
105105
decompress(frame, decompressed_frame);
106106
const compressed_string_t* block_fmt = translate(", in %q\n");
107-
char decompressed_block[block_fmt->length];
107+
char decompressed_block[decompress_length(block_fmt)];
108108
decompress(block_fmt, decompressed_block);
109109

110110
// Print the traceback

py/objexcept.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, const com
400400

401401
// Try to allocate memory for the message
402402
mp_obj_str_t *o_str = m_new_obj_maybe(mp_obj_str_t);
403-
size_t o_str_alloc = fmt->length + 1;
403+
size_t o_str_alloc = decompress_length(fmt);
404404
byte *o_str_buf = m_new_maybe(byte, o_str_alloc);
405405

406406
bool used_emg_buf = false;
@@ -433,7 +433,7 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, const com
433433
// We have some memory to format the string
434434
struct _exc_printer_t exc_pr = {!used_emg_buf, o_str_alloc, 0, o_str_buf};
435435
mp_print_t print = {&exc_pr, exc_add_strn};
436-
char fmt_decompressed[fmt->length];
436+
char fmt_decompressed[decompress_length(fmt)];
437437
decompress(fmt, fmt_decompressed);
438438
mp_vprintf(&print, fmt_decompressed, ap);
439439
exc_pr.buf[exc_pr.len] = '\0';

supervisor/shared/translate.c

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
#include "supervisor/serial.h"
3838

3939
void serial_write_compressed(const compressed_string_t* compressed) {
40-
char decompressed[compressed->length];
40+
char decompressed[decompress_length(compressed)];
4141
decompress(compressed, decompressed);
4242
serial_write(decompressed);
4343
}
@@ -58,12 +58,22 @@ STATIC int put_utf8(char *buf, int u) {
5858
}
5959
}
6060

61+
uint16_t decompress_length(const compressed_string_t* compressed) {
62+
if (compress_max_length_bits <= 8) {
63+
return 1 + (compressed->data >> (8 - compress_max_length_bits));
64+
} else {
65+
return 1 + ((compressed->data * 256 + compressed->tail[0]) >> (16 - compress_max_length_bits));
66+
}
67+
}
68+
6169
char* decompress(const compressed_string_t* compressed, char* decompressed) {
62-
uint8_t this_byte = 0;
63-
uint8_t this_bit = 7;
64-
uint8_t b = compressed->data[this_byte];
70+
uint8_t this_byte = compress_max_length_bits / 8;
71+
uint8_t this_bit = 7 - compress_max_length_bits % 8;
72+
uint8_t b = (&compressed->data)[this_byte];
73+
uint16_t length = decompress_length(compressed);
74+
6575
// Stop one early because the last byte is always NULL.
66-
for (uint16_t i = 0; i < compressed->length - 1;) {
76+
for (uint16_t i = 0; i < length - 1;) {
6777
uint32_t bits = 0;
6878
uint8_t bit_length = 0;
6979
uint32_t max_code = lengths[0];
@@ -78,7 +88,7 @@ char* decompress(const compressed_string_t* compressed, char* decompressed) {
7888
if (this_bit == 0) {
7989
this_bit = 7;
8090
this_byte += 1;
81-
b = compressed->data[this_byte]; // This may read past the end but its never used.
91+
b = (&compressed->data)[this_byte]; // This may read past the end but its never used.
8292
} else {
8393
this_bit -= 1;
8494
}
@@ -91,14 +101,14 @@ char* decompress(const compressed_string_t* compressed, char* decompressed) {
91101
i += put_utf8(decompressed + i, values[searched_length + bits - max_code]);
92102
}
93103

94-
decompressed[compressed->length-1] = '\0';
104+
decompressed[length-1] = '\0';
95105
return decompressed;
96106
}
97107

98108
inline __attribute__((always_inline)) const compressed_string_t* translate(const char* original) {
99109
#ifndef NO_QSTR
100110
#define QDEF(id, str)
101-
#define TRANSLATION(id, len, compressed...) if (strcmp(original, id) == 0) { static const compressed_string_t v = {.length = len, .data = compressed}; return &v; } else
111+
#define TRANSLATION(id, firstbyte, ...) if (strcmp(original, id) == 0) { static const compressed_string_t v = { .data = firstbyte, .tail = { __VA_ARGS__ } }; return &v; } else
102112
#include "genhdr/qstrdefs.generated.h"
103113
#undef TRANSLATION
104114
#undef QDEF

supervisor/shared/translate.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@
3030
#include <stdint.h>
3131

3232
typedef struct {
33-
uint16_t length;
34-
const uint8_t data[];
33+
uint8_t data;
34+
const uint8_t tail[];
3535
} compressed_string_t;
3636

3737
const compressed_string_t* translate(const char* c);
3838
void serial_write_compressed(const compressed_string_t* compressed);
3939
char* decompress(const compressed_string_t* compressed, char* decompressed);
40+
uint16_t decompress_length(const compressed_string_t* compressed);
4041

4142
#endif // MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H

0 commit comments

Comments
 (0)