From ee80947612d93105f2251005ac1d0772e232223a Mon Sep 17 00:00:00 2001
From: Nathan Shively-Sanders <293473+sandersn@users.noreply.github.com>
Date: Thu, 12 Dec 2024 15:32:30 -0800
Subject: [PATCH 1/4] More regex error recovery in reScanSlashToken

This still does NOT include the ability to log diagnostics from
https://github.com/microsoft/TypeScript/pull/55600. But it does match
tsc's smarter regex-end detection for unterminated regexes (which you
get a lot when parsing JSX as JS).
---
 internal/scanner/scanner.go | 57 +++++++++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 6 deletions(-)

diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go
index bdf5429b9b..e381997e13 100644
--- a/internal/scanner/scanner.go
+++ b/internal/scanner/scanner.go
@@ -805,6 +805,7 @@ func (s *Scanner) ReScanTemplateToken(isTaggedTemplate bool) ast.Kind {
 func (s *Scanner) ReScanSlashToken() ast.Kind {
 	if s.token == ast.KindSlashToken || s.token == ast.KindSlashEqualsToken {
 		s.pos = s.tokenStart + 1
+		startOfRegExpBody := s.pos
 		inEscape := false
 		inCharacterClass := false
 	loop:
@@ -824,7 +825,6 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
 			case ch == '/' && !inCharacterClass:
 				// A slash within a character class is permissible,
 				// but in general it signals the end of the regexp literal.
-				s.pos++
 				break loop
 			case ch == '[':
 				inCharacterClass = true
@@ -835,12 +835,57 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
 			}
 			s.pos += size
 		}
-		for {
-			ch, size := s.charAndSize()
-			if size == 0 || !isIdentifierPart(ch, s.languageVersion) {
-				break
+		if s.tokenFlags&ast.TokenFlagsUnterminated != 0 {
+			// Search for the nearest unbalanced bracket for better recovery. Since the expression is
+			// invalid anyways, we take nested square brackets into consideration for the best guess.
+			endOfRegExpBody := s.pos
+			s.pos = startOfRegExpBody
+			inEscape = false
+			characterClassDepth := 0
+			inDecimalQuantifier := false
+			groupDepth := 0
+			for s.pos < endOfRegExpBody {
+				ch, size := s.charAndSize()
+				if inEscape {
+					inEscape = false
+				} else if ch == '\\' {
+					inEscape = true
+				} else if ch == '[' {
+					characterClassDepth++
+				} else if ch == ']' && characterClassDepth != 0 {
+					characterClassDepth--
+				} else if !(characterClassDepth != 0) {
+					if ch == '{' {
+						inDecimalQuantifier = true
+					} else if ch == '}' && inDecimalQuantifier {
+						inDecimalQuantifier = false
+					} else if !inDecimalQuantifier {
+						if ch == '(' {
+							groupDepth++
+						} else if ch == ')' && groupDepth != 0 {
+							groupDepth--
+						} else if ch == ')' || ch == ']' || ch == '}' {
+							// We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
+							break
+						}
+					}
+				}
+				s.pos += size
+			}
+			// Whitespaces and semicolons at the end are not likely to be part of the regex
+			for stringutil.IsWhiteSpaceLike(s.charAt(-1)) || s.charAt(-1) == ';' {
+				s.pos--
+			}
+		} else {
+			// Consume the slash character
+			s.pos++
+			for {
+				ch, size := s.charAndSize()
+				if size == 0 || !isIdentifierPart(ch, s.languageVersion) {
+					break
+				}
+				s.pos += size
 			}
-			s.pos += size
 		}
 		s.tokenValue = s.text[s.tokenStart:s.pos]
 		s.token = ast.KindRegularExpressionLiteral

From c5e94125cb00adb957c7fba5b7c97bc025ba7540 Mon Sep 17 00:00:00 2001
From: Nathan Shively-Sanders <293473+sandersn@users.noreply.github.com>
Date: Fri, 13 Dec 2024 14:26:00 -0800
Subject: [PATCH 2/4] account for 2-byte whitespace

---
 internal/scanner/scanner.go | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go
index e381997e13..623ea44dd5 100644
--- a/internal/scanner/scanner.go
+++ b/internal/scanner/scanner.go
@@ -873,8 +873,13 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
 				s.pos += size
 			}
 			// Whitespaces and semicolons at the end are not likely to be part of the regex
-			for stringutil.IsWhiteSpaceLike(s.charAt(-1)) || s.charAt(-1) == ';' {
-				s.pos--
+			for {
+				ch, size := utf8.DecodeLastRuneInString(s.text[:s.pos])
+				if stringutil.IsWhiteSpaceLike(ch) || ch == ';' {
+					s.pos -= size
+				} else {
+					break
+				}
 			}
 		} else {
 			// Consume the slash character

From a7b3c84f4c095b1fc40134b178d351c1294d9ea8 Mon Sep 17 00:00:00 2001
From: Nathan Shively-Sanders <293473+sandersn@users.noreply.github.com>
Date: Mon, 16 Dec 2024 09:46:15 -0800
Subject: [PATCH 3/4] Address PR comments

1. Flip bad ts-to-go conversion of truthiness check.
2. Re-add simple "Untermined regular expression literal" error.
---
 internal/scanner/scanner.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go
index 623ea44dd5..9ce0ea475a 100644
--- a/internal/scanner/scanner.go
+++ b/internal/scanner/scanner.go
@@ -854,7 +854,7 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
 					characterClassDepth++
 				} else if ch == ']' && characterClassDepth != 0 {
 					characterClassDepth--
-				} else if !(characterClassDepth != 0) {
+				} else if characterClassDepth == 0 {
 					if ch == '{' {
 						inDecimalQuantifier = true
 					} else if ch == '}' && inDecimalQuantifier {
@@ -881,6 +881,7 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
 					break
 				}
 			}
+			s.errorAt(diagnostics.Unterminated_regular_expression_literal, s.tokenStart, s.pos - s.tokenStart);
 		} else {
 			// Consume the slash character
 			s.pos++

From 4e003d3fad03679de1039ee59a8834f50ac140a3 Mon Sep 17 00:00:00 2001
From: Nathan Shively-Sanders <293473+sandersn@users.noreply.github.com>
Date: Mon, 16 Dec 2024 13:15:20 -0800
Subject: [PATCH 4/4] hereby format

---
 internal/scanner/scanner.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go
index 9ce0ea475a..9a7a074b53 100644
--- a/internal/scanner/scanner.go
+++ b/internal/scanner/scanner.go
@@ -881,7 +881,7 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
 					break
 				}
 			}
-			s.errorAt(diagnostics.Unterminated_regular_expression_literal, s.tokenStart, s.pos - s.tokenStart);
+			s.errorAt(diagnostics.Unterminated_regular_expression_literal, s.tokenStart, s.pos-s.tokenStart)
 		} else {
 			// Consume the slash character
 			s.pos++