Skip to content

Commit cfd03f3

Browse files
Don't write 0 to the end of strings in embind (#10844)
This is unsafe because it's one past the end of the string, and in threaded contexts it can cause data races
1 parent dcb4112 commit cfd03f3

File tree

2 files changed

+18
-33
lines changed

2 files changed

+18
-33
lines changed

src/embind/embind.js

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -635,20 +635,13 @@ var LibraryEmbind = {
635635

636636
var str;
637637
if (stdStringIsUTF8) {
638-
//ensure null termination at one-past-end byte if not present yet
639-
var endChar = HEAPU8[value + 4 + length];
640-
var endCharSwap = 0;
641-
if (endChar != 0) {
642-
endCharSwap = endChar;
643-
HEAPU8[value + 4 + length] = 0;
644-
}
645-
646638
var decodeStartPtr = value + 4;
647639
// Looping here to support possible embedded '0' bytes
648640
for (var i = 0; i <= length; ++i) {
649641
var currentBytePtr = value + 4 + i;
650-
if (HEAPU8[currentBytePtr] == 0) {
651-
var stringSegment = UTF8ToString(decodeStartPtr);
642+
if (HEAPU8[currentBytePtr] == 0 || i == length) {
643+
var maxRead = currentBytePtr - decodeStartPtr;
644+
var stringSegment = UTF8ToString(decodeStartPtr, maxRead);
652645
if (str === undefined) {
653646
str = stringSegment;
654647
} else {
@@ -658,10 +651,6 @@ var LibraryEmbind = {
658651
decodeStartPtr = currentBytePtr + 1;
659652
}
660653
}
661-
662-
if (endCharSwap != 0) {
663-
HEAPU8[value + 4 + length] = endCharSwap;
664-
}
665654
} else {
666655
var a = new Array(length);
667656
for (var i = 0; i < length; ++i) {
@@ -754,20 +743,14 @@ var LibraryEmbind = {
754743
var length = HEAPU32[value >> 2];
755744
var HEAP = getHeap();
756745
var str;
757-
// Ensure null termination at one-past-end byte if not present yet
758-
var endChar = HEAP[(value + 4 + length * charSize) >> shift];
759-
var endCharSwap = 0;
760-
if (endChar != 0) {
761-
endCharSwap = endChar;
762-
HEAP[(value + 4 + length * charSize) >> shift] = 0;
763-
}
764746

765747
var decodeStartPtr = value + 4;
766748
// Looping here to support possible embedded '0' bytes
767749
for (var i = 0; i <= length; ++i) {
768750
var currentBytePtr = value + 4 + i * charSize;
769-
if (HEAP[currentBytePtr >> shift] == 0) {
770-
var stringSegment = decodeString(decodeStartPtr);
751+
if (HEAP[currentBytePtr >> shift] == 0 || i == length) {
752+
var maxReadBytes = currentBytePtr - decodeStartPtr;
753+
var stringSegment = decodeString(decodeStartPtr, maxReadBytes);
771754
if (str === undefined) {
772755
str = stringSegment;
773756
} else {
@@ -778,10 +761,6 @@ var LibraryEmbind = {
778761
}
779762
}
780763

781-
if (endCharSwap != 0) {
782-
HEAP[(value + 4 + length * charSize) >> shift] = endCharSwap;
783-
}
784-
785764
_free(value);
786765

787766
return str;

src/runtime_strings_extra.js

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ var UTF16Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-16l
3939
#endif // TEXTDECODER
4040
#endif // TEXTDECODER == 2
4141

42-
function UTF16ToString(ptr) {
42+
function UTF16ToString(ptr, maxBytesToRead) {
4343
#if ASSERTIONS
4444
assert(ptr % 2 == 0, 'Pointer passed to UTF16ToString must be aligned to two bytes!');
4545
#endif
@@ -48,7 +48,10 @@ function UTF16ToString(ptr) {
4848
// TextDecoder needs to know the byte length in advance, it doesn't stop on null terminator by itself.
4949
// Also, use the length info to avoid running tiny strings through TextDecoder, since .subarray() allocates garbage.
5050
var idx = endPtr >> 1;
51-
while (HEAP16[idx]) ++idx;
51+
var maxIdx = idx + maxBytesToRead / 2;
52+
// If maxBytesToRead is not passed explicitly, it will be undefined, and this
53+
// will always evaluate to true. This saves on code size.
54+
while (!(idx >= maxIdx) && HEAPU16[idx]) ++idx;
5255
endPtr = idx << 1;
5356

5457
#if TEXTDECODER != 2
@@ -64,7 +67,7 @@ function UTF16ToString(ptr) {
6467
var str = '';
6568
while (1) {
6669
var codeUnit = {{{ makeGetValue('ptr', 'i*2', 'i16') }}};
67-
if (codeUnit == 0) return str;
70+
if (codeUnit == 0 || i == maxBytesToRead / 2) return str;
6871
++i;
6972
// fromCharCode constructs a character from a UTF-16 code unit, so we can pass the UTF16 string right through.
7073
str += String.fromCharCode(codeUnit);
@@ -117,16 +120,18 @@ function lengthBytesUTF16(str) {
117120
return str.length*2;
118121
}
119122

120-
function UTF32ToString(ptr) {
123+
function UTF32ToString(ptr, maxBytesToRead) {
121124
#if ASSERTIONS
122125
assert(ptr % 4 == 0, 'Pointer passed to UTF32ToString must be aligned to four bytes!');
123126
#endif
124127
var i = 0;
125128

126129
var str = '';
127-
while (1) {
130+
// If maxBytesToRead is not passed explicitly, it will be undefined, and this
131+
// will always evaluate to true. This saves on code size.
132+
while (!(i >= maxBytesToRead / 4)) {
128133
var utf32 = {{{ makeGetValue('ptr', 'i*4', 'i32') }}};
129-
if (utf32 == 0) return str;
134+
if (utf32 == 0) break;
130135
++i;
131136
// Gotcha: fromCharCode constructs a character from a UTF-16 encoded code (pair), not from a Unicode code point! So encode the code point to UTF-16 for constructing.
132137
// See http://unicode.org/faq/utf_bom.html#utf16-3
@@ -137,6 +142,7 @@ function UTF32ToString(ptr) {
137142
str += String.fromCharCode(utf32);
138143
}
139144
}
145+
return str;
140146
}
141147

142148
// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr',

0 commit comments

Comments
 (0)