Skip to content

Commit 2e62d25

Browse files
Apply Suggested Changes
1 parent 603c3cf commit 2e62d25

5 files changed

+84
-42
lines changed

src/compiler/scanner.ts

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1556,9 +1556,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15561556
tokenFlags |= TokenFlags.ContainsInvalidEscape;
15571557
if (isRegularExpression || shouldEmitInvalidEscapeError) {
15581558
const code = parseInt(text.substring(start + 1, pos), 8);
1559-
if (isRegularExpression !== "annex-b") {
1560-
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
1561-
}
1559+
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
15621560
return String.fromCharCode(code);
15631561
}
15641562
return text.substring(start, pos);
@@ -2426,6 +2424,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24262424
// Quickly get to the end of regex such that we know the flags
24272425
let p = tokenStart + 1;
24282426
let inEscape = false;
2427+
let namedCaptureGroups = false;
24292428
// Although nested character classes are allowed in Unicode Sets mode,
24302429
// an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
24312430
// Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
@@ -2469,6 +2468,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24692468
else if (ch === CharacterCodes.closeBracket) {
24702469
inCharacterClass = false;
24712470
}
2471+
else if (
2472+
ch === CharacterCodes.openParen
2473+
&& charCodeUnchecked(p + 1) === CharacterCodes.question
2474+
&& charCodeUnchecked(p + 2) === CharacterCodes.lessThan
2475+
&& charCodeUnchecked(p + 3) !== CharacterCodes.equals
2476+
&& charCodeUnchecked(p + 3) !== CharacterCodes.exclamation
2477+
) {
2478+
namedCaptureGroups = true;
2479+
}
24722480
p++;
24732481
}
24742482
const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated);
@@ -2505,7 +2513,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25052513
const saveEnd = end;
25062514
pos = tokenStart + 1;
25072515
end = endOfBody;
2508-
scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true);
2516+
scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true, namedCaptureGroups);
25092517
tokenStart = saveTokenStart;
25102518
tokenFlags = saveTokenFlags;
25112519
pos = savePos;
@@ -2517,7 +2525,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25172525
return token;
25182526
}
25192527

2520-
function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) {
2528+
function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean, namedCaptureGroups: boolean) {
25212529
// Why var? It avoids TDZ checks in the runtime which can be costly.
25222530
// See: https://github.com/microsoft/TypeScript/issues/52924
25232531
/* eslint-disable no-var */
@@ -2527,10 +2535,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25272535
/** Grammar parameter */
25282536
var unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
25292537

2530-
if (unicodeMode) {
2531-
// Annex B treats any unicode mode as the strict syntax.
2532-
annexB = false;
2533-
}
2538+
// Annex B treats any unicode mode as the strict syntax.
2539+
var anyUnicodeModeOrNonAnnexB = unicodeMode || !annexB;
25342540

25352541
/** @see {scanClassSetExpression} */
25362542
var mayContainStrings = false;
@@ -2626,7 +2632,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26262632
case CharacterCodes.exclamation:
26272633
pos++;
26282634
// In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2629-
isPreviousTermQuantifiable = annexB;
2635+
isPreviousTermQuantifiable = !anyUnicodeModeOrNonAnnexB;
26302636
break;
26312637
case CharacterCodes.lessThan:
26322638
const groupNameStart = pos;
@@ -2675,7 +2681,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26752681
const digitsStart = pos;
26762682
scanDigits();
26772683
const min = tokenValue;
2678-
if (annexB && !min) {
2684+
if (!anyUnicodeModeOrNonAnnexB && !min) {
26792685
isPreviousTermQuantifiable = true;
26802686
break;
26812687
}
@@ -2693,26 +2699,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26932699
break;
26942700
}
26952701
}
2696-
else if (max && Number.parseInt(min) > Number.parseInt(max) && (!annexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
2702+
else if (max && Number.parseInt(min) > Number.parseInt(max) && (anyUnicodeModeOrNonAnnexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
26972703
error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart);
26982704
}
26992705
}
27002706
else if (!min) {
2701-
if (!annexB) {
2707+
if (anyUnicodeModeOrNonAnnexB) {
27022708
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
27032709
}
27042710
isPreviousTermQuantifiable = true;
27052711
break;
27062712
}
27072713
if (charCodeChecked(pos) !== CharacterCodes.closeBrace) {
2708-
if (annexB) {
2709-
isPreviousTermQuantifiable = true;
2710-
break;
2711-
}
2712-
else {
2714+
if (anyUnicodeModeOrNonAnnexB) {
27132715
error(Diagnostics._0_expected, pos, 0, String.fromCharCode(CharacterCodes.closeBrace));
27142716
pos--;
27152717
}
2718+
else {
2719+
isPreviousTermQuantifiable = true;
2720+
break;
2721+
}
27162722
}
27172723
// falls through
27182724
case CharacterCodes.asterisk:
@@ -2754,7 +2760,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27542760
// Assume what starting from the character to be outside of the regex
27552761
return;
27562762
}
2757-
if (!annexB || ch === CharacterCodes.closeParen) {
2763+
if (anyUnicodeModeOrNonAnnexB || ch === CharacterCodes.closeParen) {
27582764
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
27592765
}
27602766
pos++;
@@ -2811,10 +2817,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28112817
scanGroupName(/*isReference*/ true);
28122818
scanExpectedChar(CharacterCodes.greaterThan);
28132819
}
2814-
else {
2815-
// This is actually allowed in Annex B if there are no named capturing groups in the regex,
2816-
// but if we were going to suppress these errors, we would have to record the positions of all '\k's
2817-
// and defer the errors until after the scanning to know if the regex has any named capturing groups.
2820+
else if (namedCaptureGroups) {
28182821
error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2);
28192822
}
28202823
break;
@@ -2864,7 +2867,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28642867
pos++;
28652868
return String.fromCharCode(ch & 0x1f);
28662869
}
2867-
if (!annexB) {
2870+
if (anyUnicodeModeOrNonAnnexB) {
28682871
error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
28692872
}
28702873
else if (atomEscape) {
@@ -2900,7 +2903,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29002903
return "\\";
29012904
}
29022905
pos--;
2903-
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true);
2906+
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ anyUnicodeModeOrNonAnnexB || "annex-b");
29042907
}
29052908
}
29062909

@@ -2949,12 +2952,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29492952
if (isClassContentExit(ch)) {
29502953
return;
29512954
}
2952-
if (!minCharacter && !annexB) {
2955+
if (!minCharacter && anyUnicodeModeOrNonAnnexB) {
29532956
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart);
29542957
}
29552958
const maxStart = pos;
29562959
const maxCharacter = scanClassAtom();
2957-
if (!maxCharacter && !annexB) {
2960+
if (!maxCharacter && anyUnicodeModeOrNonAnnexB) {
29582961
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart);
29592962
continue;
29602963
}
@@ -3450,12 +3453,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34503453
error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start);
34513454
}
34523455
}
3453-
else if (annexB) {
3454-
pos--;
3455-
return false;
3456+
else if (anyUnicodeModeOrNonAnnexB) {
3457+
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
34563458
}
34573459
else {
3458-
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
3460+
pos--;
3461+
return false;
34593462
}
34603463
return true;
34613464
}
@@ -3500,7 +3503,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
35003503
forEach(decimalEscapes, escape => {
35013504
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
35023505
// either a LegacyOctalEscapeSequence or IdentityEscape
3503-
if (!annexB && escape.value > numberOfCapturingGroups) {
3506+
if (anyUnicodeModeOrNonAnnexB && escape.value > numberOfCapturingGroups) {
35043507
if (numberOfCapturingGroups) {
35053508
error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
35063509
}

tests/baselines/reference/regularExpressionAnnexB.errors.txt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
regularExpressionAnnexB.ts(2,8): error TS1125: Hexadecimal digit expected.
2-
regularExpressionAnnexB.ts(2,12): error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets.
32
regularExpressionAnnexB.ts(2,22): error TS1125: Hexadecimal digit expected.
43
regularExpressionAnnexB.ts(2,28): error TS1125: Hexadecimal digit expected.
54
regularExpressionAnnexB.ts(3,9): error TS1125: Hexadecimal digit expected.
@@ -8,7 +7,6 @@ regularExpressionAnnexB.ts(3,29): error TS1125: Hexadecimal digit expected.
87
regularExpressionAnnexB.ts(7,4): error TS1535: This character cannot be escaped in a regular expression.
98
regularExpressionAnnexB.ts(7,8): error TS1125: Hexadecimal digit expected.
109
regularExpressionAnnexB.ts(7,10): error TS1512: '\c' must be followed by an ASCII letter.
11-
regularExpressionAnnexB.ts(7,12): error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets.
1210
regularExpressionAnnexB.ts(7,14): error TS1535: This character cannot be escaped in a regular expression.
1311
regularExpressionAnnexB.ts(7,18): error TS1535: This character cannot be escaped in a regular expression.
1412
regularExpressionAnnexB.ts(7,22): error TS1125: Hexadecimal digit expected.
@@ -75,13 +73,11 @@ regularExpressionAnnexB.ts(42,5): error TS1506: Numbers out of order in quantifi
7573
regularExpressionAnnexB.ts(42,10): error TS1507: There is nothing available for repetition.
7674

7775

78-
==== regularExpressionAnnexB.ts (75 errors) ====
76+
==== regularExpressionAnnexB.ts (73 errors) ====
7977
const regexes: RegExp[] = [
8078
/\q\u\i\c\k\_\f\o\x\-\j\u\m\p\s/,
8179

8280
!!! error TS1125: Hexadecimal digit expected.
83-
~~
84-
!!! error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets.
8581

8682
!!! error TS1125: Hexadecimal digit expected.
8783

@@ -103,8 +99,6 @@ regularExpressionAnnexB.ts(42,10): error TS1507: There is nothing available for
10399
!!! error TS1125: Hexadecimal digit expected.
104100
~~
105101
!!! error TS1512: '\c' must be followed by an ASCII letter.
106-
~~
107-
!!! error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets.
108102
~~
109103
!!! error TS1535: This character cannot be escaped in a regular expression.
110104
~~

tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ regularExpressionScanning.ts(5,6): error TS1499: Unknown regular expression flag
1717
regularExpressionScanning.ts(5,7): error TS1509: This regular expression flag cannot be toggled within a subpattern.
1818
regularExpressionScanning.ts(5,10): error TS1509: This regular expression flag cannot be toggled within a subpattern.
1919
regularExpressionScanning.ts(5,11): error TS1500: Duplicate regular expression flag.
20+
regularExpressionScanning.ts(12,24): error TS1487: Octal escape sequences are not allowed. Use the syntax '\x01'.
21+
regularExpressionScanning.ts(12,26): error TS1487: Octal escape sequences are not allowed. Use the syntax '\x01'.
22+
regularExpressionScanning.ts(12,29): error TS1487: Octal escape sequences are not allowed. Use the syntax '\x53'.
23+
regularExpressionScanning.ts(12,37): error TS1487: Octal escape sequences are not allowed. Use the syntax '\x03'.
24+
regularExpressionScanning.ts(12,43): error TS1487: Octal escape sequences are not allowed. Use the syntax '\x05'.
2025
regularExpressionScanning.ts(13,9): error TS1533: This backreference refers to a group that does not exist. There are only 4 capturing groups in this regular expression.
2126
regularExpressionScanning.ts(13,24): error TS1487: Octal escape sequences are not allowed. Use the syntax '\x01'.
2227
regularExpressionScanning.ts(13,26): error TS1487: Octal escape sequences are not allowed. Use the syntax '\x01'.
@@ -202,7 +207,7 @@ regularExpressionScanning.ts(47,89): error TS1518: Anything that would possibly
202207
regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag is only available when targeting 'esnext' or later.
203208

204209

205-
==== regularExpressionScanning.ts (202 errors) ====
210+
==== regularExpressionScanning.ts (207 errors) ====
206211
const regexes: RegExp[] = [
207212
// Flags
208213
/foo/visualstudiocode,
@@ -253,6 +258,16 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag
253258
/(hi)\1/,
254259
/(hi) (hello) \2/,
255260
/\2()(\12)(foo)\1\0[\0\1\01\123](\3\03)\5\005/,
261+
~~
262+
!!! error TS1487: Octal escape sequences are not allowed. Use the syntax '\x01'.
263+
~~~
264+
!!! error TS1487: Octal escape sequences are not allowed. Use the syntax '\x01'.
265+
~~~~
266+
!!! error TS1487: Octal escape sequences are not allowed. Use the syntax '\x53'.
267+
~~~
268+
!!! error TS1487: Octal escape sequences are not allowed. Use the syntax '\x03'.
269+
~~~~
270+
!!! error TS1487: Octal escape sequences are not allowed. Use the syntax '\x05'.
256271
/\2()(\12)(foo)\1\0[\0\1\01\123](\3\03)\5\005/u,
257272
~~
258273
!!! error TS1533: This backreference refers to a group that does not exist. There are only 4 capturing groups in this regular expression.

0 commit comments

Comments
 (0)