@@ -21,7 +21,6 @@ import {
2121 KeywordSyntaxKind ,
2222 LanguageFeatureMinimumTarget ,
2323 LanguageVariant ,
24- lastOrUndefined ,
2524 LineAndCharacter ,
2625 MapLike ,
2726 parsePseudoBigInt ,
@@ -1614,7 +1613,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
16141613 isRegularExpression && shouldEmitInvalidEscapeError && escapedValue >= 0xD800 && escapedValue <= 0xDBFF &&
16151614 pos + 6 < end && text . substring ( pos , pos + 2 ) === "\\u" && charCodeUnchecked ( pos + 2 ) !== CharacterCodes . openBrace
16161615 ) {
1617- // For regular expressions in Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character
1616+ // For regular expressions in any Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character
16181617 // for the purpose of determining whether a character class range is out of order
16191618 // https://tc39.es/ecma262/#prod-RegExpUnicodeEscapeSequence
16201619 const nextStart = pos ;
@@ -2429,7 +2428,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24292428 pos = startOfRegExpBody ;
24302429 let inEscape = false ;
24312430 // Although nested character classes are allowed in Unicode Sets mode,
2432- // an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
2431+ // an unescaped slash is nevertheless invalid even in a character class in any Unicode mode.
24332432 // Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
24342433 // as unterminated, consuming characters beyond the slash. (This even applies to `/[[]/v`,
24352434 // which should be parsed as a well-terminated regex with an incomplete character class.)
@@ -2438,13 +2437,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24382437 while ( true ) {
24392438 // If we reach the end of a file, or hit a newline, then this is an unterminated
24402439 // regex. Report error and return what we have so far.
2441- if ( pos >= end ) {
2442- tokenFlags |= TokenFlags . Unterminated ;
2443- break ;
2444- }
2445-
2446- const ch = charCodeUnchecked ( pos ) ;
2447- if ( isLineBreak ( ch ) ) {
2440+ const ch = charCodeChecked ( pos ) ;
2441+ if ( ch === CharacterCodes . EOF || isLineBreak ( ch ) ) {
24482442 tokenFlags |= TokenFlags . Unterminated ;
24492443 break ;
24502444 }
@@ -2477,7 +2471,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24772471 pos = startOfRegExpBody ;
24782472 inEscape = false ;
24792473 let characterClassDepth = 0 ;
2480- const bracketStack : CharacterCodes [ ] = [ ] ;
2474+ let inDecimalQuantifier = false ;
2475+ let groupDepth = 0 ;
24812476 while ( pos < endOfRegExpBody ) {
24822477 const ch = charCodeUnchecked ( pos ) ;
24832478 if ( inEscape ) {
@@ -2493,18 +2488,23 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24932488 characterClassDepth -- ;
24942489 }
24952490 else if ( ! characterClassDepth ) {
2496- if ( ch === CharacterCodes . openParen ) {
2497- bracketStack . push ( CharacterCodes . closeParen ) ;
2491+ if ( ch === CharacterCodes . openBrace ) {
2492+ inDecimalQuantifier = true ;
24982493 }
2499- else if ( ch === CharacterCodes . openBrace ) {
2500- bracketStack . push ( CharacterCodes . closeBrace ) ;
2494+ else if ( ch === CharacterCodes . closeBrace && inDecimalQuantifier ) {
2495+ inDecimalQuantifier = false ;
25012496 }
2502- else if ( ch === lastOrUndefined ( bracketStack ) ) {
2503- bracketStack . pop ( ) ;
2504- }
2505- else if ( ch === CharacterCodes . closeParen || ch === CharacterCodes . closeBracket || ch === CharacterCodes . closeBrace ) {
2506- // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
2507- break ;
2497+ else if ( ! inDecimalQuantifier ) {
2498+ if ( ch === CharacterCodes . openParen ) {
2499+ groupDepth ++ ;
2500+ }
2501+ else if ( ch === CharacterCodes . closeParen && groupDepth ) {
2502+ groupDepth -- ;
2503+ }
2504+ else if ( ch === CharacterCodes . closeParen || ch === CharacterCodes . closeBracket || ch === CharacterCodes . closeBrace ) {
2505+ // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
2506+ break ;
2507+ }
25082508 }
25092509 }
25102510 pos ++ ;
@@ -2517,9 +2517,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25172517 // Consume the slash character
25182518 pos ++ ;
25192519 let regExpFlags = RegularExpressionFlags . None ;
2520- while ( pos < end ) {
2521- const ch = codePointUnchecked ( pos ) ;
2522- if ( ! isIdentifierPart ( ch , languageVersion ) ) {
2520+ while ( true ) {
2521+ const ch = codePointChecked ( pos ) ;
2522+ if ( ch === CharacterCodes . EOF || ! isIdentifierPart ( ch , languageVersion ) ) {
25232523 break ;
25242524 }
25252525 if ( reportErrors ) {
@@ -2530,7 +2530,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25302530 else if ( regExpFlags & flag ) {
25312531 error ( Diagnostics . Duplicate_regular_expression_flag , pos , 1 ) ;
25322532 }
2533- else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . UnicodeMode ) === RegularExpressionFlags . UnicodeMode ) {
2533+ else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . AnyUnicodeMode ) === RegularExpressionFlags . AnyUnicodeMode ) {
25342534 error ( Diagnostics . The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously , pos , 1 ) ;
25352535 }
25362536 else {
@@ -2560,9 +2560,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25602560 /** Grammar parameter */
25612561 var unicodeSetsMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeSets ) ;
25622562 /** Grammar parameter */
2563- var unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2563+ var anyUnicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . AnyUnicodeMode ) ;
25642564
2565- if ( unicodeMode ) {
2565+ if ( anyUnicodeMode ) {
25662566 // Annex B treats any unicode mode as the strict syntax.
25672567 annexB = false ;
25682568 }
@@ -2719,7 +2719,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27192719 error ( Diagnostics . Incomplete_quantifier_Digit_expected , digitsStart , 0 ) ;
27202720 }
27212721 else {
2722- if ( unicodeMode ) {
2722+ if ( anyUnicodeMode ) {
27232723 error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
27242724 }
27252725 isPreviousTermQuantifiable = true ;
@@ -2731,7 +2731,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27312731 }
27322732 }
27332733 else if ( ! min ) {
2734- if ( unicodeMode ) {
2734+ if ( anyUnicodeMode ) {
27352735 error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
27362736 }
27372737 isPreviousTermQuantifiable = true ;
@@ -2775,7 +2775,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27752775 // falls through
27762776 case CharacterCodes . closeBracket :
27772777 case CharacterCodes . closeBrace :
2778- if ( unicodeMode || ch === CharacterCodes . closeParen ) {
2778+ if ( anyUnicodeMode || ch === CharacterCodes . closeParen ) {
27792779 error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , pos , 1 , String . fromCharCode ( ch ) ) ;
27802780 }
27812781 pos ++ ;
@@ -2832,7 +2832,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28322832 scanGroupName ( /*isReference*/ true ) ;
28332833 scanExpectedChar ( CharacterCodes . greaterThan ) ;
28342834 }
2835- else if ( unicodeMode ) {
2835+ else if ( anyUnicodeMode ) {
28362836 error ( Diagnostics . k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets , pos - 2 , 2 ) ;
28372837 }
28382838 break ;
@@ -2875,14 +2875,17 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28752875 Debug . assertEqual ( charCodeUnchecked ( pos - 1 ) , CharacterCodes . backslash ) ;
28762876 let ch = charCodeChecked ( pos ) ;
28772877 switch ( ch ) {
2878+ case CharacterCodes . EOF :
2879+ error ( Diagnostics . Undetermined_character_escape , pos - 1 , 1 ) ;
2880+ return "\\" ;
28782881 case CharacterCodes . c :
28792882 pos ++ ;
28802883 ch = charCodeChecked ( pos ) ;
28812884 if ( isASCIILetter ( ch ) ) {
28822885 pos ++ ;
28832886 return String . fromCharCode ( ch & 0x1f ) ;
28842887 }
2885- if ( unicodeMode ) {
2888+ if ( anyUnicodeMode ) {
28862889 error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
28872890 }
28882891 else if ( atomEscape && annexB ) {
@@ -2913,12 +2916,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29132916 pos ++ ;
29142917 return String . fromCharCode ( ch ) ;
29152918 default :
2916- if ( pos >= end ) {
2917- error ( Diagnostics . Undetermined_character_escape , pos - 1 , 1 ) ;
2918- return "\\" ;
2919- }
29202919 pos -- ;
2921- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2920+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ anyUnicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
29222921 }
29232922 }
29242923
@@ -3464,11 +3463,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34643463 }
34653464 }
34663465 scanExpectedChar ( CharacterCodes . closeBrace ) ;
3467- if ( ! unicodeMode ) {
3466+ if ( ! anyUnicodeMode ) {
34683467 error ( Diagnostics . Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set , start , pos - start ) ;
34693468 }
34703469 }
3471- else if ( unicodeMode ) {
3470+ else if ( anyUnicodeMode ) {
34723471 error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
34733472 }
34743473 return true ;
@@ -3490,7 +3489,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34903489 }
34913490
34923491 function scanSourceCharacter ( ) : string {
3493- const size = unicodeMode ? charSize ( charCodeChecked ( pos ) ) : 1 ;
3492+ const size = anyUnicodeMode ? charSize ( charCodeChecked ( pos ) ) : 1 ;
34943493 pos += size ;
34953494 return size > 0 ? text . substring ( pos - size , pos ) : "" ;
34963495 }
0 commit comments