Skip to content

Commit e17c424

Browse files
committed
Emit stringview conversions in binary writer
So that we continue to produce valid stringref code. Parse the conversions as nops in the binary parser to preserve our ability to round-trip. Emitting the conversions requires using extra scratch locals, and between that and parsing the nop, there is quite a bit of code bloat when round-tripping, but stringref code should never be emitted in production use cases, so that's ok.
1 parent 0abf51c commit e17c424

File tree

6 files changed

+135
-42
lines changed

6 files changed

+135
-42
lines changed

src/parser/wat-parser.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,10 @@ Result<> doParseModule(Module& wasm, Lexer& input, bool allowExtra) {
202202
CHECK_ERR(im);
203203
}
204204
if (!f->imported()) {
205-
CHECK_ERR(ctx.irBuilder.visitEnd());
205+
auto end = ctx.irBuilder.visitEnd();
206+
if (auto* err = end.getErr()) {
207+
return ctx.in.err(decls.funcDefs[i].pos, err->msg);
208+
}
206209
}
207210
}
208211

src/wasm-binary.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,7 @@ enum ASTNodes {
11371137
StringEncodeLossyUTF8 = 0x8d,
11381138
StringEncodeWTF8 = 0x8e,
11391139
StringNewUTF8Try = 0x8f,
1140+
StringAsWTF16 = 0x98,
11401141
StringViewWTF16GetCodePoint = 0x9a,
11411142
StringViewWTF16Slice = 0x9c,
11421143
StringCompare = 0xa8,
@@ -1756,6 +1757,7 @@ class WasmBinaryReader {
17561757
bool maybeVisitArrayFill(Expression*& out, uint32_t code);
17571758
bool maybeVisitArrayInit(Expression*& out, uint32_t code);
17581759
bool maybeVisitStringNew(Expression*& out, uint32_t code);
1760+
bool maybeVisitStringAsWTF16(Expression*& out, uint32_t code);
17591761
bool maybeVisitStringConst(Expression*& out, uint32_t code);
17601762
bool maybeVisitStringMeasure(Expression*& out, uint32_t code);
17611763
bool maybeVisitStringEncode(Expression*& out, uint32_t code);

src/wasm/wasm-binary.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4240,6 +4240,9 @@ BinaryConsts::ASTNodes WasmBinaryReader::readExpression(Expression*& curr) {
42404240
if (maybeVisitStringNew(curr, opcode)) {
42414241
break;
42424242
}
4243+
if (maybeVisitStringAsWTF16(curr, opcode)) {
4244+
break;
4245+
}
42434246
if (maybeVisitStringConst(curr, opcode)) {
42444247
break;
42454248
}
@@ -7565,6 +7568,21 @@ bool WasmBinaryReader::maybeVisitStringNew(Expression*& out, uint32_t code) {
75657568
return true;
75667569
}
75677570

7571+
bool WasmBinaryReader::maybeVisitStringAsWTF16(Expression*& out,
7572+
uint32_t code) {
7573+
if (code != BinaryConsts::StringAsWTF16) {
7574+
return false;
7575+
}
7576+
// Parse `string.as_wtf16` as a nop. We do not support this instruction in the
7577+
// IR, but parsing it as a nop will ensure that the subsequent
7578+
// `stringview_wtf16.get_codeunit` or `stringview_wtf16.slice` will still
7579+
// receive the string value. We need to at least accept this in the binary
7580+
// parser because we emit `string.as_wtf16` as part of the binary encoding of
7581+
// those other two instructions.
7582+
out = Builder(wasm).makeNop();
7583+
return true;
7584+
}
7585+
75687586
bool WasmBinaryReader::maybeVisitStringConst(Expression*& out, uint32_t code) {
75697587
if (code != BinaryConsts::StringConst) {
75707588
return false;

src/wasm/wasm-stack.cpp

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2435,13 +2435,30 @@ void BinaryInstWriter::visitStringEq(StringEq* curr) {
24352435
}
24362436

24372437
void BinaryInstWriter::visitStringWTF16Get(StringWTF16Get* curr) {
2438+
// We need to convert the ref operand to a stringview, but it is under the pos
2439+
// operand. Put the i32 in a scratch local, emit the conversion, then get the
2440+
// i32 back onto the stack.
2441+
Index scratchPos = scratchLocals[Type::i32];
2442+
o << int8_t(BinaryConsts::LocalSet) << U32LEB(scratchPos);
2443+
o << int8_t(BinaryConsts::GCPrefix) << U32LEB(BinaryConsts::StringAsWTF16);
2444+
o << int8_t(BinaryConsts::LocalGet) << U32LEB(scratchPos);
24382445
o << int8_t(BinaryConsts::GCPrefix)
24392446
<< U32LEB(BinaryConsts::StringViewWTF16GetCodePoint);
24402447
}
24412448

24422449
void BinaryInstWriter::visitStringSliceWTF(StringSliceWTF* curr) {
2443-
o << int8_t(BinaryConsts::GCPrefix);
2444-
o << U32LEB(BinaryConsts::StringViewWTF16Slice);
2450+
// We need to convert the ref operand to a stringview, but it is buried under
2451+
// the start and end operands. Put the i32s in scratch locals, emit the
2452+
// conversion, then get the i32s back onto the stack.
2453+
Index scratchStart = scratchLocals[Type::i32];
2454+
Index scratchEnd = scratchStart + 1;
2455+
o << int8_t(BinaryConsts::LocalSet) << U32LEB(scratchEnd);
2456+
o << int8_t(BinaryConsts::LocalSet) << U32LEB(scratchStart);
2457+
o << int8_t(BinaryConsts::GCPrefix) << U32LEB(BinaryConsts::StringAsWTF16);
2458+
o << int8_t(BinaryConsts::LocalGet) << U32LEB(scratchStart);
2459+
o << int8_t(BinaryConsts::LocalGet) << U32LEB(scratchEnd);
2460+
o << int8_t(BinaryConsts::GCPrefix)
2461+
<< U32LEB(BinaryConsts::StringViewWTF16Slice);
24452462
}
24462463

24472464
void BinaryInstWriter::visitContBind(ContBind* curr) {
@@ -2631,6 +2648,22 @@ InsertOrderedMap<Type, Index> BinaryInstWriter::countScratchLocals() {
26312648
count = std::max(count, 1u);
26322649
}
26332650
}
2651+
2652+
void visitStringWTF16Get(StringWTF16Get* curr) {
2653+
if (curr->type == Type::unreachable) {
2654+
return;
2655+
}
2656+
auto& count = scratches[Type::i32];
2657+
count = std::max(count, 1u);
2658+
}
2659+
2660+
void visitStringSliceWTF(StringSliceWTF* curr) {
2661+
if (curr->type == Type::unreachable) {
2662+
return;
2663+
}
2664+
auto& count = scratches[Type::i32];
2665+
count = std::max(count, 2u);
2666+
}
26342667
};
26352668

26362669
ScratchLocalFinder finder(*this);

test/lit/string.as_wtf16.wast

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
;; into any IR.
55

66
;; RUN: wasm-opt %s -all -S -o - | filecheck %s
7+
;; RUN: wasm-opt %s -all --roundtrip -S -o - | filecheck %s --check-prefix=RTRIP
78

89
(module
910
;; CHECK: (func $empty (type $1)
1011
;; CHECK-NEXT: (nop)
1112
;; CHECK-NEXT: )
13+
;; RTRIP: (func $empty (type $1)
14+
;; RTRIP-NEXT: (nop)
15+
;; RTRIP-NEXT: )
1216
(func $empty
1317
(string.as_wtf16)
1418
)
@@ -19,7 +23,26 @@
1923
;; CHECK-NEXT: (i32.const 0)
2024
;; CHECK-NEXT: )
2125
;; CHECK-NEXT: )
26+
;; RTRIP: (func $codeunit (type $0) (result i32)
27+
;; RTRIP-NEXT: (local $0 i32)
28+
;; RTRIP-NEXT: (local $1 (ref string))
29+
;; RTRIP-NEXT: (stringview_wtf16.get_codeunit
30+
;; RTRIP-NEXT: (block (result (ref string))
31+
;; RTRIP-NEXT: (local.set $1
32+
;; RTRIP-NEXT: (string.const "abc")
33+
;; RTRIP-NEXT: )
34+
;; RTRIP-NEXT: (local.set $0
35+
;; RTRIP-NEXT: (i32.const 0)
36+
;; RTRIP-NEXT: )
37+
;; RTRIP-NEXT: (nop)
38+
;; RTRIP-NEXT: (local.get $1)
39+
;; RTRIP-NEXT: )
40+
;; RTRIP-NEXT: (local.get $0)
41+
;; RTRIP-NEXT: )
42+
;; RTRIP-NEXT: )
2243
(func $codeunit (result i32)
44+
;; This should parse ok with the conversion skipped. The roundtrip will
45+
;; include scratch locals.
2346
(stringview_wtf16.get_codeunit
2447
(string.as_wtf16
2548
(string.const "abc")
@@ -28,12 +51,65 @@
2851
)
2952
)
3053

54+
;; CHECK: (func $slice (type $2) (result stringref)
55+
;; CHECK-NEXT: (stringview_wtf16.slice
56+
;; CHECK-NEXT: (string.const "abc")
57+
;; CHECK-NEXT: (i32.const 1)
58+
;; CHECK-NEXT: (i32.const 2)
59+
;; CHECK-NEXT: )
60+
;; CHECK-NEXT: )
61+
;; RTRIP: (func $slice (type $2) (result stringref)
62+
;; RTRIP-NEXT: (local $0 i32)
63+
;; RTRIP-NEXT: (local $1 i32)
64+
;; RTRIP-NEXT: (local $2 i32)
65+
;; RTRIP-NEXT: (local $3 (ref string))
66+
;; RTRIP-NEXT: (stringview_wtf16.slice
67+
;; RTRIP-NEXT: (block (result (ref string))
68+
;; RTRIP-NEXT: (local.set $3
69+
;; RTRIP-NEXT: (string.const "abc")
70+
;; RTRIP-NEXT: )
71+
;; RTRIP-NEXT: (local.set $0
72+
;; RTRIP-NEXT: (block (result i32)
73+
;; RTRIP-NEXT: (local.set $2
74+
;; RTRIP-NEXT: (i32.const 1)
75+
;; RTRIP-NEXT: )
76+
;; RTRIP-NEXT: (local.set $1
77+
;; RTRIP-NEXT: (i32.const 2)
78+
;; RTRIP-NEXT: )
79+
;; RTRIP-NEXT: (local.get $2)
80+
;; RTRIP-NEXT: )
81+
;; RTRIP-NEXT: )
82+
;; RTRIP-NEXT: (nop)
83+
;; RTRIP-NEXT: (local.get $3)
84+
;; RTRIP-NEXT: )
85+
;; RTRIP-NEXT: (local.get $0)
86+
;; RTRIP-NEXT: (local.get $1)
87+
;; RTRIP-NEXT: )
88+
;; RTRIP-NEXT: )
89+
(func $slice (result stringref)
90+
;; This should parse ok with the conversion skipped. The roundtrip will
91+
;; include scratch locals.
92+
(stringview_wtf16.slice
93+
(string.as_wtf16
94+
(string.const "abc")
95+
)
96+
(i32.const 1)
97+
(i32.const 2)
98+
)
99+
)
100+
31101
;; CHECK: (func $length (type $0) (result i32)
32102
;; CHECK-NEXT: (string.measure_wtf16
33103
;; CHECK-NEXT: (string.const "abc")
34104
;; CHECK-NEXT: )
35105
;; CHECK-NEXT: )
106+
;; RTRIP: (func $length (type $0) (result i32)
107+
;; RTRIP-NEXT: (string.measure_wtf16
108+
;; RTRIP-NEXT: (string.const "abc")
109+
;; RTRIP-NEXT: )
110+
;; RTRIP-NEXT: )
36111
(func $length (result i32)
112+
;; This should be parsed as string.measure_wtf16 instead.
37113
(stringview_wtf16.length
38114
(string.as_wtf16
39115
(string.const "abc")

test/lit/strings.wast

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -300,45 +300,6 @@
300300
)
301301
)
302302

303-
;; CHECK: (func $stringview-access (type $0) (param $a stringref)
304-
;; CHECK-NEXT: (local $i32 i32)
305-
;; CHECK-NEXT: (local.set $i32
306-
;; CHECK-NEXT: (stringview_wtf16.get_codeunit
307-
;; CHECK-NEXT: (local.get $a)
308-
;; CHECK-NEXT: (i32.const 2)
309-
;; CHECK-NEXT: )
310-
;; CHECK-NEXT: )
311-
;; CHECK-NEXT: )
312-
(func $stringview-access
313-
(param $a stringref)
314-
(local $i32 i32)
315-
(local.set $i32 ;; validate the output type
316-
(stringview_wtf16.get_codeunit
317-
(local.get $a)
318-
(i32.const 2)
319-
)
320-
)
321-
)
322-
;; CHECK: (func $stringview-slice (type $0) (param $a stringref)
323-
;; CHECK-NEXT: (local.set $a
324-
;; CHECK-NEXT: (stringview_wtf16.slice
325-
;; CHECK-NEXT: (local.get $a)
326-
;; CHECK-NEXT: (i32.const 2)
327-
;; CHECK-NEXT: (i32.const 3)
328-
;; CHECK-NEXT: )
329-
;; CHECK-NEXT: )
330-
;; CHECK-NEXT: )
331-
(func $stringview-slice
332-
(param $a stringref)
333-
(local.set $a ;; validate the output type
334-
(stringview_wtf16.slice
335-
(local.get $a)
336-
(i32.const 2)
337-
(i32.const 3)
338-
)
339-
)
340-
)
341-
342303
;; CHECK: (func $string.new.gc (type $6) (param $array (ref $array)) (param $array16 (ref $array16))
343304
;; CHECK-NEXT: (drop
344305
;; CHECK-NEXT: (string.new_wtf16_array

0 commit comments

Comments
 (0)