@@ -1556,9 +1556,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15561556 tokenFlags |= TokenFlags . ContainsInvalidEscape ;
15571557 if ( isRegularExpression || shouldEmitInvalidEscapeError ) {
15581558 const code = parseInt ( text . substring ( start + 1 , pos ) , 8 ) ;
1559- if ( isRegularExpression !== "annex-b" ) {
1560- error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
1561- }
1559+ error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
15621560 return String . fromCharCode ( code ) ;
15631561 }
15641562 return text . substring ( start , pos ) ;
@@ -2426,6 +2424,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24262424 // Quickly get to the end of regex such that we know the flags
24272425 let p = tokenStart + 1 ;
24282426 let inEscape = false ;
2427+ let namedCaptureGroups = false ;
24292428 // Although nested character classes are allowed in Unicode Sets mode,
24302429 // an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
24312430 // Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
@@ -2469,6 +2468,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24692468 else if ( ch === CharacterCodes . closeBracket ) {
24702469 inCharacterClass = false ;
24712470 }
2471+ else if (
2472+ ch === CharacterCodes . openParen
2473+ && charCodeUnchecked ( p + 1 ) === CharacterCodes . question
2474+ && charCodeUnchecked ( p + 2 ) === CharacterCodes . lessThan
2475+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . equals
2476+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . exclamation
2477+ ) {
2478+ namedCaptureGroups = true ;
2479+ }
24722480 p ++ ;
24732481 }
24742482 const isUnterminated = ! ! ( tokenFlags & TokenFlags . Unterminated ) ;
@@ -2505,7 +2513,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25052513 const saveEnd = end ;
25062514 pos = tokenStart + 1 ;
25072515 end = endOfBody ;
2508- scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true ) ;
2516+ scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true , namedCaptureGroups ) ;
25092517 tokenStart = saveTokenStart ;
25102518 tokenFlags = saveTokenFlags ;
25112519 pos = savePos ;
@@ -2517,7 +2525,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25172525 return token ;
25182526 }
25192527
2520- function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean ) {
2528+ function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean , namedCaptureGroups : boolean ) {
25212529 // Why var? It avoids TDZ checks in the runtime which can be costly.
25222530 // See: https://github.com/microsoft/TypeScript/issues/52924
25232531 /* eslint-disable no-var */
@@ -2527,10 +2535,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25272535 /** Grammar parameter */
25282536 var unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
25292537
2530- if ( unicodeMode ) {
2531- // Annex B treats any unicode mode as the strict syntax.
2532- annexB = false ;
2533- }
2538+ // Annex B treats any unicode mode as the strict syntax.
2539+ var anyUnicodeModeOrNonAnnexB = unicodeMode || ! annexB ;
25342540
25352541 /** @see {scanClassSetExpression} */
25362542 var mayContainStrings = false ;
@@ -2626,7 +2632,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26262632 case CharacterCodes . exclamation :
26272633 pos ++ ;
26282634 // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2629- isPreviousTermQuantifiable = annexB ;
2635+ isPreviousTermQuantifiable = ! anyUnicodeModeOrNonAnnexB ;
26302636 break ;
26312637 case CharacterCodes . lessThan :
26322638 const groupNameStart = pos ;
@@ -2675,7 +2681,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26752681 const digitsStart = pos ;
26762682 scanDigits ( ) ;
26772683 const min = tokenValue ;
2678- if ( annexB && ! min ) {
2684+ if ( ! anyUnicodeModeOrNonAnnexB && ! min ) {
26792685 isPreviousTermQuantifiable = true ;
26802686 break ;
26812687 }
@@ -2693,26 +2699,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26932699 break ;
26942700 }
26952701 }
2696- else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( ! annexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
2702+ else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( anyUnicodeModeOrNonAnnexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
26972703 error ( Diagnostics . Numbers_out_of_order_in_quantifier , digitsStart , pos - digitsStart ) ;
26982704 }
26992705 }
27002706 else if ( ! min ) {
2701- if ( ! annexB ) {
2707+ if ( anyUnicodeModeOrNonAnnexB ) {
27022708 error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
27032709 }
27042710 isPreviousTermQuantifiable = true ;
27052711 break ;
27062712 }
27072713 if ( charCodeChecked ( pos ) !== CharacterCodes . closeBrace ) {
2708- if ( annexB ) {
2709- isPreviousTermQuantifiable = true ;
2710- break ;
2711- }
2712- else {
2714+ if ( anyUnicodeModeOrNonAnnexB ) {
27132715 error ( Diagnostics . _0_expected , pos , 0 , String . fromCharCode ( CharacterCodes . closeBrace ) ) ;
27142716 pos -- ;
27152717 }
2718+ else {
2719+ isPreviousTermQuantifiable = true ;
2720+ break ;
2721+ }
27162722 }
27172723 // falls through
27182724 case CharacterCodes . asterisk :
@@ -2754,7 +2760,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27542760 // Assume what starting from the character to be outside of the regex
27552761 return ;
27562762 }
2757- if ( ! annexB || ch === CharacterCodes . closeParen ) {
2763+ if ( anyUnicodeModeOrNonAnnexB || ch === CharacterCodes . closeParen ) {
27582764 error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , pos , 1 , String . fromCharCode ( ch ) ) ;
27592765 }
27602766 pos ++ ;
@@ -2811,10 +2817,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28112817 scanGroupName ( /*isReference*/ true ) ;
28122818 scanExpectedChar ( CharacterCodes . greaterThan ) ;
28132819 }
2814- else {
2815- // This is actually allowed in Annex B if there are no named capturing groups in the regex,
2816- // but if we were going to suppress these errors, we would have to record the positions of all '\k's
2817- // and defer the errors until after the scanning to know if the regex has any named capturing groups.
2820+ else if ( namedCaptureGroups ) {
28182821 error ( Diagnostics . k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets , pos - 2 , 2 ) ;
28192822 }
28202823 break ;
@@ -2864,7 +2867,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28642867 pos ++ ;
28652868 return String . fromCharCode ( ch & 0x1f ) ;
28662869 }
2867- if ( ! annexB ) {
2870+ if ( anyUnicodeModeOrNonAnnexB ) {
28682871 error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
28692872 }
28702873 else if ( atomEscape ) {
@@ -2900,7 +2903,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29002903 return "\\" ;
29012904 }
29022905 pos -- ;
2903- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2906+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ anyUnicodeModeOrNonAnnexB || "annex-b" ) ;
29042907 }
29052908 }
29062909
@@ -2949,12 +2952,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29492952 if ( isClassContentExit ( ch ) ) {
29502953 return ;
29512954 }
2952- if ( ! minCharacter && ! annexB ) {
2955+ if ( ! minCharacter && anyUnicodeModeOrNonAnnexB ) {
29532956 error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , minStart , pos - 1 - minStart ) ;
29542957 }
29552958 const maxStart = pos ;
29562959 const maxCharacter = scanClassAtom ( ) ;
2957- if ( ! maxCharacter && ! annexB ) {
2960+ if ( ! maxCharacter && anyUnicodeModeOrNonAnnexB ) {
29582961 error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , maxStart , pos - maxStart ) ;
29592962 continue ;
29602963 }
@@ -3450,12 +3453,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34503453 error ( Diagnostics . Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set , start , pos - start ) ;
34513454 }
34523455 }
3453- else if ( annexB ) {
3454- pos -- ;
3455- return false ;
3456+ else if ( anyUnicodeModeOrNonAnnexB ) {
3457+ error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
34563458 }
34573459 else {
3458- error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
3460+ pos -- ;
3461+ return false ;
34593462 }
34603463 return true ;
34613464 }
@@ -3500,7 +3503,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
35003503 forEach ( decimalEscapes , escape => {
35013504 // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
35023505 // either a LegacyOctalEscapeSequence or IdentityEscape
3503- if ( ! annexB && escape . value > numberOfCapturingGroups ) {
3506+ if ( anyUnicodeModeOrNonAnnexB && escape . value > numberOfCapturingGroups ) {
35043507 if ( numberOfCapturingGroups ) {
35053508 error ( Diagnostics . This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression , escape . pos , escape . end - escape . pos , numberOfCapturingGroups ) ;
35063509 }
0 commit comments