Skip to content

Commit d9490e3

Browse files
committed
src: support UTF-8 in compiled-in JS source files
Detect it when source files in lib/ are not ASCII. Decode them as UTF-8 and store them as UTF-16 in the binary so they can be used as external string resources without non-ASCII characters getting mangled.
1 parent 4e259b2 commit d9490e3

File tree

2 files changed

+56
-21
lines changed

2 files changed

+56
-21
lines changed

src/node_javascript.cc

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,53 @@
66

77
namespace node {
88

9+
using v8::Isolate;
910
using v8::Local;
11+
using v8::MaybeLocal;
1012
using v8::NewStringType;
1113
using v8::Object;
1214
using v8::String;
1315

16+
template <typename T, size_t N, T P>
17+
struct ExternalStringResource;
18+
19+
template <size_t N, const char (&P)[N]>
20+
struct ExternalStringResource<const char[N], N, P>
21+
: public String::ExternalOneByteStringResource {
22+
const char* data() const override { return P; }
23+
size_t length() const override { return N; }
24+
void Dispose() override { /* Default calls `delete this`. */ }
25+
};
26+
27+
template <size_t N, const uint16_t (&P)[N]>
28+
struct ExternalStringResource<const uint16_t[N], N, P>
29+
: public String::ExternalStringResource {
30+
const uint16_t* data() const override { return P; }
31+
size_t length() const override { return N; }
32+
void Dispose() override { /* Default calls `delete this`. */ }
33+
};
34+
1435
// id##_data is defined in node_natives.h.
15-
#define V(id) \
16-
static struct : public String::ExternalOneByteStringResource { \
17-
const char* data() const override { \
18-
return reinterpret_cast<const char*>(id##_data); \
19-
} \
20-
size_t length() const override { return sizeof(id##_data); } \
21-
void Dispose() override { /* Default calls `delete this`. */ } \
22-
} id##_external_data;
36+
#define V(id) \
37+
static ExternalStringResource<decltype(id##_data), \
38+
arraysize(id##_data), \
39+
id##_data> id##_external_data;
2340
NODE_NATIVES_MAP(V)
2441
#undef V
2542

43+
inline MaybeLocal<String>
44+
ToExternal(Isolate* isolate, String::ExternalOneByteStringResource* that) {
45+
return String::NewExternalOneByte(isolate, that);
46+
}
47+
48+
inline MaybeLocal<String>
49+
ToExternal(Isolate* isolate, String::ExternalStringResource* that) {
50+
return String::NewExternalTwoByte(isolate, that);
51+
}
52+
2653
Local<String> MainSource(Environment* env) {
27-
auto maybe_string =
28-
String::NewExternalOneByte(
29-
env->isolate(),
30-
&internal_bootstrap_node_external_data);
31-
return maybe_string.ToLocalChecked();
54+
return ToExternal(env->isolate(),
55+
&internal_bootstrap_node_external_data).ToLocalChecked();
3256
}
3357

3458
void DefineJavaScript(Environment* env, Local<Object> target) {
@@ -40,8 +64,7 @@ void DefineJavaScript(Environment* env, Local<Object> target) {
4064
env->isolate(), id##_name, NewStringType::kNormal, \
4165
sizeof(id##_name)).ToLocalChecked(); \
4266
auto value = \
43-
String::NewExternalOneByte( \
44-
env->isolate(), &id##_external_data).ToLocalChecked(); \
67+
ToExternal(env->isolate(), &id##_external_data).ToLocalChecked(); \
4568
CHECK(target->Set(context, key, value).FromJust()); \
4669
} while (0);
4770
NODE_NATIVES_MAP(V)

tools/js2c.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,16 @@
3737
import string
3838

3939

40-
def ToCString(contents):
41-
step = 20
42-
slices = (contents[i:i+step] for i in xrange(0, len(contents), step))
43-
slices = map(lambda s: ','.join(str(ord(c)) for c in s), slices)
40+
def ToCArray(elements, step=10):
41+
slices = (elements[i:i+step] for i in xrange(0, len(elements), step))
42+
slices = map(lambda s: ','.join(str(x) for x in s), slices)
4443
return ',\n'.join(slices)
4544

4645

46+
def ToCString(contents):
47+
return ToCArray(map(ord, contents), step=20)
48+
49+
4750
def ReadFile(filename):
4851
file = open(filename, "rt")
4952
try:
@@ -186,7 +189,7 @@ def ReadMacros(lines):
186189
SOURCES = """\
187190
static const uint8_t {escaped_id}_name[] = {{
188191
{name}}};
189-
static const uint8_t {escaped_id}_data[] = {{
192+
static const {ctype} {escaped_id}_data[] = {{
190193
{data}}};
191194
"""
192195

@@ -214,7 +217,16 @@ def JS2C(source, target):
214217
lines = ReadFile(str(s))
215218
lines = ExpandConstants(lines, consts)
216219
lines = ExpandMacros(lines, macros)
217-
data = ToCString(lines)
220+
221+
# Treat non-ASCII as UTF-8 and convert it to UTF-16.
222+
if any(ord(c) > 127 for c in lines):
223+
ctype = 'uint16_t'
224+
data = map(ord, lines.decode('utf-8').encode('utf-16be'))
225+
data = [data[i] * 256 + data[i+1] for i in xrange(0, len(data), 2)]
226+
data = ToCArray(data)
227+
else:
228+
ctype = 'char'
229+
data = ToCString(lines)
218230

219231
# On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar"
220232
# so don't assume there is always a slash in the file path.

0 commit comments

Comments
 (0)