Skip to content

Commit 2a83652

Browse files
MaxGraeydcodeIO
authored andcommitted
Support tokenization of \x-escaped string sequences (#970)
1 parent 92a768f commit 2a83652

File tree

4 files changed

+3292
-3210
lines changed

4 files changed

+3292
-3210
lines changed

src/tokenizer.ts

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,6 +1146,9 @@ export class Tokenizer extends DiagnosticEmitter {
11461146
}
11471147
return this.readUnicodeEscape(); // \uDDDD
11481148
}
1149+
case CharCode.x: {
1150+
return this.readHexadecimalEscape(); // \xDD
1151+
}
11491152
case CharCode.CARRIAGERETURN: {
11501153
if (
11511154
this.pos < end &&
@@ -1237,21 +1240,18 @@ export class Tokenizer extends DiagnosticEmitter {
12371240
testInteger(): bool {
12381241
var end = this.end;
12391242
var text = this.source.text;
1240-
if (this.pos + 1 < end && text.charCodeAt(this.pos) == CharCode._0) {
1241-
switch (text.charCodeAt(this.pos + 2)) {
1243+
var pos = this.pos;
1244+
if (pos + 1 < end && text.charCodeAt(pos) == CharCode._0) {
1245+
switch (text.charCodeAt(pos + 2) | 32) {
12421246
case CharCode.x:
1243-
case CharCode.X:
12441247
case CharCode.b:
1245-
case CharCode.B:
1246-
case CharCode.o:
1247-
case CharCode.O: return true;
1248+
case CharCode.o: return true;
12481249
}
12491250
}
1250-
var pos = this.pos;
12511251
while (pos < end) {
12521252
let c = text.charCodeAt(pos);
1253-
if (c == CharCode.DOT || c == CharCode.e || c == CharCode.E) return false;
1254-
if ((c < CharCode._0 || c > CharCode._9) && c != CharCode._) break;
1253+
if (c == CharCode.DOT || (c | 32) == CharCode.e) return false;
1254+
if (c != CharCode._ && (c < CharCode._0 || c > CharCode._9)) break;
12551255
// does not validate separator placement (this is done in readXYInteger)
12561256
pos++;
12571257
}
@@ -1261,19 +1261,16 @@ export class Tokenizer extends DiagnosticEmitter {
12611261
readInteger(): I64 {
12621262
var text = this.source.text;
12631263
if (this.pos + 2 < this.end && text.charCodeAt(this.pos) == CharCode._0) {
1264-
switch (text.charCodeAt(this.pos + 1)) {
1265-
case CharCode.x:
1266-
case CharCode.X: {
1264+
switch (text.charCodeAt(this.pos + 1) | 32) {
1265+
case CharCode.x: {
12671266
this.pos += 2;
12681267
return this.readHexInteger();
12691268
}
1270-
case CharCode.b:
1271-
case CharCode.B: {
1269+
case CharCode.b: {
12721270
this.pos += 2;
12731271
return this.readBinaryInteger();
12741272
}
1275-
case CharCode.o:
1276-
case CharCode.O: {
1273+
case CharCode.o: {
12771274
this.pos += 2;
12781275
return this.readOctalInteger();
12791276
}
@@ -1517,7 +1514,7 @@ export class Tokenizer extends DiagnosticEmitter {
15171514
}
15181515
if (this.pos < end) {
15191516
let c = text.charCodeAt(this.pos);
1520-
if (c == CharCode.e || c == CharCode.E) {
1517+
if ((c | 32) == CharCode.e) {
15211518
if (
15221519
++this.pos < end &&
15231520
(c = text.charCodeAt(this.pos)) == CharCode.MINUS || c == CharCode.PLUS &&
@@ -1537,8 +1534,7 @@ export class Tokenizer extends DiagnosticEmitter {
15371534
throw new Error("not implemented"); // TBD
15381535
}
15391536

1540-
readUnicodeEscape(): string {
1541-
var remain = 4;
1537+
readHexadecimalEscape(remain: i32 = 2): string {
15421538
var value = 0;
15431539
var end = this.end;
15441540
var text = this.source.text;
@@ -1569,6 +1565,10 @@ export class Tokenizer extends DiagnosticEmitter {
15691565
return String.fromCharCode(value);
15701566
}
15711567

1568+
readUnicodeEscape(): string {
1569+
return this.readHexadecimalEscape(4);
1570+
}
1571+
15721572
private readExtendedUnicodeEscape(): string {
15731573
var start = this.pos;
15741574
var value = this.readHexInteger();
@@ -1603,11 +1603,11 @@ export class Tokenizer extends DiagnosticEmitter {
16031603
}
16041604

16051605
if (invalid) return "";
1606-
return value32 < 65536
1606+
return value32 < 0x10000
16071607
? String.fromCharCode(value32)
16081608
: String.fromCharCode(
1609-
((value32 - 65536) >>> 10) + 0xD800,
1610-
((value32 - 65536) & 1023) + 0xDC00
1609+
((value32 - 0x10000) >>> 10) | 0xD800,
1610+
((value32 - 0x10000) & 1023) | 0xDC00
16111611
);
16121612
}
16131613

0 commit comments

Comments
 (0)