@@ -1556,9 +1556,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
1556
1556
tokenFlags |= TokenFlags . ContainsInvalidEscape ;
1557
1557
if ( isRegularExpression || shouldEmitInvalidEscapeError ) {
1558
1558
const code = parseInt ( text . substring ( start + 1 , pos ) , 8 ) ;
1559
- if ( isRegularExpression !== "annex-b" ) {
1560
- error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
1561
- }
1559
+ error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
1562
1560
return String . fromCharCode ( code ) ;
1563
1561
}
1564
1562
return text . substring ( start , pos ) ;
@@ -2426,6 +2424,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2426
2424
// Quickly get to the end of regex such that we know the flags
2427
2425
let p = tokenStart + 1 ;
2428
2426
let inEscape = false ;
2427
+ let namedCaptureGroups = false ;
2429
2428
// Although nested character classes are allowed in Unicode Sets mode,
2430
2429
// an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
2431
2430
// Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
@@ -2469,6 +2468,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2469
2468
else if ( ch === CharacterCodes . closeBracket ) {
2470
2469
inCharacterClass = false ;
2471
2470
}
2471
+ else if (
2472
+ ch === CharacterCodes . openParen
2473
+ && charCodeUnchecked ( p + 1 ) === CharacterCodes . question
2474
+ && charCodeUnchecked ( p + 2 ) === CharacterCodes . lessThan
2475
+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . equals
2476
+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . exclamation
2477
+ ) {
2478
+ namedCaptureGroups = true ;
2479
+ }
2472
2480
p ++ ;
2473
2481
}
2474
2482
const isUnterminated = ! ! ( tokenFlags & TokenFlags . Unterminated ) ;
@@ -2505,7 +2513,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2505
2513
const saveEnd = end ;
2506
2514
pos = tokenStart + 1 ;
2507
2515
end = endOfBody ;
2508
- scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true ) ;
2516
+ scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true , namedCaptureGroups ) ;
2509
2517
tokenStart = saveTokenStart ;
2510
2518
tokenFlags = saveTokenFlags ;
2511
2519
pos = savePos ;
@@ -2517,7 +2525,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2517
2525
return token ;
2518
2526
}
2519
2527
2520
- function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean ) {
2528
+ function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean , namedCaptureGroups : boolean ) {
2521
2529
// Why var? It avoids TDZ checks in the runtime which can be costly.
2522
2530
// See: https://github.com/microsoft/TypeScript/issues/52924
2523
2531
/* eslint-disable no-var */
@@ -2527,10 +2535,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2527
2535
/** Grammar parameter */
2528
2536
var unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2529
2537
2530
- if ( unicodeMode ) {
2531
- // Annex B treats any unicode mode as the strict syntax.
2532
- annexB = false ;
2533
- }
2538
+ // Annex B treats any unicode mode as the strict syntax.
2539
+ var anyUnicodeModeOrNonAnnexB = unicodeMode || ! annexB ;
2534
2540
2535
2541
/** @see {scanClassSetExpression} */
2536
2542
var mayContainStrings = false ;
@@ -2626,7 +2632,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2626
2632
case CharacterCodes . exclamation :
2627
2633
pos ++ ;
2628
2634
// In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2629
- isPreviousTermQuantifiable = annexB ;
2635
+ isPreviousTermQuantifiable = ! anyUnicodeModeOrNonAnnexB ;
2630
2636
break ;
2631
2637
case CharacterCodes . lessThan :
2632
2638
const groupNameStart = pos ;
@@ -2675,7 +2681,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2675
2681
const digitsStart = pos ;
2676
2682
scanDigits ( ) ;
2677
2683
const min = tokenValue ;
2678
- if ( annexB && ! min ) {
2684
+ if ( ! anyUnicodeModeOrNonAnnexB && ! min ) {
2679
2685
isPreviousTermQuantifiable = true ;
2680
2686
break ;
2681
2687
}
@@ -2693,26 +2699,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2693
2699
break ;
2694
2700
}
2695
2701
}
2696
- else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( ! annexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
2702
+ else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( anyUnicodeModeOrNonAnnexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
2697
2703
error ( Diagnostics . Numbers_out_of_order_in_quantifier , digitsStart , pos - digitsStart ) ;
2698
2704
}
2699
2705
}
2700
2706
else if ( ! min ) {
2701
- if ( ! annexB ) {
2707
+ if ( anyUnicodeModeOrNonAnnexB ) {
2702
2708
error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
2703
2709
}
2704
2710
isPreviousTermQuantifiable = true ;
2705
2711
break ;
2706
2712
}
2707
2713
if ( charCodeChecked ( pos ) !== CharacterCodes . closeBrace ) {
2708
- if ( annexB ) {
2709
- isPreviousTermQuantifiable = true ;
2710
- break ;
2711
- }
2712
- else {
2714
+ if ( anyUnicodeModeOrNonAnnexB ) {
2713
2715
error ( Diagnostics . _0_expected , pos , 0 , String . fromCharCode ( CharacterCodes . closeBrace ) ) ;
2714
2716
pos -- ;
2715
2717
}
2718
+ else {
2719
+ isPreviousTermQuantifiable = true ;
2720
+ break ;
2721
+ }
2716
2722
}
2717
2723
// falls through
2718
2724
case CharacterCodes . asterisk :
@@ -2754,7 +2760,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2754
2760
// Assume what starting from the character to be outside of the regex
2755
2761
return ;
2756
2762
}
2757
- if ( ! annexB || ch === CharacterCodes . closeParen ) {
2763
+ if ( anyUnicodeModeOrNonAnnexB || ch === CharacterCodes . closeParen ) {
2758
2764
error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , pos , 1 , String . fromCharCode ( ch ) ) ;
2759
2765
}
2760
2766
pos ++ ;
@@ -2811,10 +2817,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2811
2817
scanGroupName ( /*isReference*/ true ) ;
2812
2818
scanExpectedChar ( CharacterCodes . greaterThan ) ;
2813
2819
}
2814
- else {
2815
- // This is actually allowed in Annex B if there are no named capturing groups in the regex,
2816
- // but if we were going to suppress these errors, we would have to record the positions of all '\k's
2817
- // and defer the errors until after the scanning to know if the regex has any named capturing groups.
2820
+ else if ( namedCaptureGroups ) {
2818
2821
error ( Diagnostics . k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets , pos - 2 , 2 ) ;
2819
2822
}
2820
2823
break ;
@@ -2864,7 +2867,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2864
2867
pos ++ ;
2865
2868
return String . fromCharCode ( ch & 0x1f ) ;
2866
2869
}
2867
- if ( ! annexB ) {
2870
+ if ( anyUnicodeModeOrNonAnnexB ) {
2868
2871
error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
2869
2872
}
2870
2873
else if ( atomEscape ) {
@@ -2900,7 +2903,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2900
2903
return "\\" ;
2901
2904
}
2902
2905
pos -- ;
2903
- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2906
+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ anyUnicodeModeOrNonAnnexB || "annex-b" ) ;
2904
2907
}
2905
2908
}
2906
2909
@@ -2949,12 +2952,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2949
2952
if ( isClassContentExit ( ch ) ) {
2950
2953
return ;
2951
2954
}
2952
- if ( ! minCharacter && ! annexB ) {
2955
+ if ( ! minCharacter && anyUnicodeModeOrNonAnnexB ) {
2953
2956
error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , minStart , pos - 1 - minStart ) ;
2954
2957
}
2955
2958
const maxStart = pos ;
2956
2959
const maxCharacter = scanClassAtom ( ) ;
2957
- if ( ! maxCharacter && ! annexB ) {
2960
+ if ( ! maxCharacter && anyUnicodeModeOrNonAnnexB ) {
2958
2961
error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , maxStart , pos - maxStart ) ;
2959
2962
continue ;
2960
2963
}
@@ -3450,12 +3453,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3450
3453
error ( Diagnostics . Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set , start , pos - start ) ;
3451
3454
}
3452
3455
}
3453
- else if ( annexB ) {
3454
- pos -- ;
3455
- return false ;
3456
+ else if ( anyUnicodeModeOrNonAnnexB ) {
3457
+ error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
3456
3458
}
3457
3459
else {
3458
- error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
3460
+ pos -- ;
3461
+ return false ;
3459
3462
}
3460
3463
return true ;
3461
3464
}
@@ -3500,7 +3503,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3500
3503
forEach ( decimalEscapes , escape => {
3501
3504
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
3502
3505
// either a LegacyOctalEscapeSequence or IdentityEscape
3503
- if ( ! annexB && escape . value > numberOfCapturingGroups ) {
3506
+ if ( anyUnicodeModeOrNonAnnexB && escape . value > numberOfCapturingGroups ) {
3504
3507
if ( numberOfCapturingGroups ) {
3505
3508
error ( Diagnostics . This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression , escape . pos , escape . end - escape . pos , numberOfCapturingGroups ) ;
3506
3509
}
0 commit comments