Scan bigger/fewer jsdoc tokens (#53081)

sandersn · web-flow · commit 137c461bd096 · 2023-03-07T16:32:04.000-08:00
diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts
@@ -364,6 +364,7 @@ import {
     tracing,
     TransformFlags,
     trimString,
+    trimStringEnd,
     TryStatement,
     TupleTypeNode,
     TypeAliasDeclaration,
@@ -2165,6 +2166,10 @@ namespace Parser {
         return currentToken = scanner.scanJsDocToken();
     }
 
+    function nextJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
+        return currentToken = scanner.scanJSDocCommentTextToken(inBackticks);
+    }
+
     function reScanGreaterToken(): SyntaxKind {
         return currentToken = scanner.reScanGreaterToken();
     }
@@ -8602,19 +8607,14 @@ namespace Parser {
                 loop: while (true) {
                     switch (token()) {
                         case SyntaxKind.AtToken:
-                            if (state === JSDocState.BeginningOfLine || state === JSDocState.SawAsterisk) {
-                                removeTrailingWhitespace(comments);
-                                if (!commentsPos) commentsPos = getNodePos();
-                                addTag(parseTag(indent));
-                                // NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
-                                // Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
-                                // for malformed examples like `/** @param {string} x @returns {number} the length */`
-                                state = JSDocState.BeginningOfLine;
-                                margin = undefined;
-                            }
-                            else {
-                                pushComment(scanner.getTokenText());
-                            }
+                            removeTrailingWhitespace(comments);
+                            if (!commentsPos) commentsPos = getNodePos();
+                            addTag(parseTag(indent));
+                            // NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
+                            // Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
+                            // for malformed examples like `/** @param {string} x @returns {number} the length */`
+                            state = JSDocState.BeginningOfLine;
+                            margin = undefined;
                             break;
                         case SyntaxKind.NewLineTrivia:
                             comments.push(scanner.getTokenText());
@@ -8623,30 +8623,33 @@ namespace Parser {
                             break;
                         case SyntaxKind.AsteriskToken:
                             const asterisk = scanner.getTokenText();
-                            if (state === JSDocState.SawAsterisk || state === JSDocState.SavingComments) {
+                            if (state === JSDocState.SawAsterisk) {
                                 // If we've already seen an asterisk, then we can no longer parse a tag on this line
                                 state = JSDocState.SavingComments;
                                 pushComment(asterisk);
                             }
                             else {
+                                Debug.assert(state === JSDocState.BeginningOfLine);
                                 // Ignore the first asterisk on a line
                                 state = JSDocState.SawAsterisk;
                                 indent += asterisk.length;
                             }
                             break;
                         case SyntaxKind.WhitespaceTrivia:
+                            Debug.assert(state !== JSDocState.SavingComments, "whitespace shouldn't come from the scanner while saving top-level comment text");
                             // only collect whitespace if we're already saving comments or have just crossed the comment indent margin
                             const whitespace = scanner.getTokenText();
-                            if (state === JSDocState.SavingComments) {
-                                comments.push(whitespace);
-                            }
-                            else if (margin !== undefined && indent + whitespace.length > margin) {
+                            if (margin !== undefined && indent + whitespace.length > margin) {
                                 comments.push(whitespace.slice(margin - indent));
                             }
                             indent += whitespace.length;
                             break;
                         case SyntaxKind.EndOfFileToken:
                             break loop;
+                        case SyntaxKind.JSDocCommentTextToken:
+                            state = JSDocState.SavingComments;
+                            pushComment(scanner.getTokenValue());
+                            break;
                         case SyntaxKind.OpenBraceToken:
                             state = JSDocState.SavingComments;
                             const commentEnd = scanner.getTokenFullStart();
@@ -8671,15 +8674,20 @@ namespace Parser {
                             pushComment(scanner.getTokenText());
                             break;
                     }
-                    nextTokenJSDoc();
+                    if (state === JSDocState.SavingComments) {
+                        nextJSDocCommentTextToken(/*inBackticks*/ false);
+                    }
+                    else {
+                        nextTokenJSDoc();
+                    }
                 }
-                removeTrailingWhitespace(comments);
-                if (parts.length && comments.length) {
-                    parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? start, commentsPos));
+                const trimmedComments = trimStringEnd(comments.join(""));
+                if (parts.length && trimmedComments.length) {
+                    parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? start, commentsPos));
                 }
                 if (parts.length && tags) Debug.assertIsDefined(commentsPos, "having parsed tags implies that the end of the comment span should be set");
                 const tagsArray = tags && createNodeArray(tags, tagsPos, tagsEnd);
-                return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : comments.length ? comments.join("") : undefined, tagsArray), start, end);
+                return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : trimmedComments.length ? trimmedComments : undefined, tagsArray), start, end);
             });
 
             function removeLeadingNewlines(comments: string[]) {
@@ -8689,8 +8697,18 @@ namespace Parser {
             }
 
             function removeTrailingWhitespace(comments: string[]) {
-                while (comments.length && comments[comments.length - 1].trim() === "") {
-                    comments.pop();
+                while (comments.length) {
+                    const trimmed = trimStringEnd(comments[comments.length - 1]);
+                    if (trimmed === "") {
+                        comments.pop();
+                    }
+                    else if (trimmed.length < comments[comments.length - 1].length) {
+                        comments[comments.length - 1] = trimmed;
+                        break;
+                    }
+                    else {
+                        break;
+                    }
                 }
             }
 
@@ -8846,7 +8864,6 @@ namespace Parser {
                 const parts: JSDocComment[] = [];
                 let linkEnd;
                 let state = JSDocState.BeginningOfLine;
-                let previousWhitespace = true;
                 let margin: number | undefined;
                 function pushComment(text: string) {
                     if (!margin) {
@@ -8862,7 +8879,7 @@ namespace Parser {
                     }
                     state = JSDocState.SawAsterisk;
                 }
-                let tok = token() as JSDocSyntaxKind;
+                let tok = token() as JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
                 loop: while (true) {
                     switch (tok) {
                         case SyntaxKind.NewLineTrivia:
@@ -8872,29 +8889,20 @@ namespace Parser {
                             indent = 0;
                             break;
                         case SyntaxKind.AtToken:
-                            if (state === JSDocState.SavingBackticks
-                                || state === JSDocState.SavingComments && (!previousWhitespace || lookAhead(isNextJSDocTokenWhitespace))) {
-                                // @ doesn't start a new tag inside ``, and inside a comment, only after whitespace or not before whitespace
-                                comments.push(scanner.getTokenText());
-                                break;
-                            }
                             scanner.resetTokenState(scanner.getTokenEnd() - 1);
-                            // falls through
+                            break loop;
                         case SyntaxKind.EndOfFileToken:
                             // Done
                             break loop;
                         case SyntaxKind.WhitespaceTrivia:
-                            if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
-                                pushComment(scanner.getTokenText());
-                            }
-                            else {
-                                const whitespace = scanner.getTokenText();
-                                // if the whitespace crosses the margin, take only the whitespace that passes the margin
-                                if (margin !== undefined && indent + whitespace.length > margin) {
-                                    comments.push(whitespace.slice(margin - indent));
-                                }
-                                indent += whitespace.length;
+                            Debug.assert(state !== JSDocState.SavingComments && state !== JSDocState.SavingBackticks, "whitespace shouldn't come from the scanner while saving comment text");
+                            const whitespace = scanner.getTokenText();
+                            // if the whitespace crosses the margin, take only the whitespace that passes the margin
+                            if (margin !== undefined && indent + whitespace.length > margin) {
+                                comments.push(whitespace.slice(margin - indent));
+                                state = JSDocState.SavingComments;
                             }
+                            indent += whitespace.length;
                             break;
                         case SyntaxKind.OpenBraceToken:
                             state = JSDocState.SavingComments;
@@ -8920,6 +8928,12 @@ namespace Parser {
                             }
                             pushComment(scanner.getTokenText());
                             break;
+                        case SyntaxKind.JSDocCommentTextToken:
+                            if (state !== JSDocState.SavingBackticks) {
+                                state = JSDocState.SavingComments; // leading identifiers start recording as well
+                            }
+                            pushComment(scanner.getTokenValue());
+                            break;
                         case SyntaxKind.AsteriskToken:
                             if (state === JSDocState.BeginningOfLine) {
                                 // leading asterisks start recording on the *next* (non-whitespace) token
@@ -8936,28 +8950,27 @@ namespace Parser {
                             pushComment(scanner.getTokenText());
                             break;
                     }
-                    previousWhitespace = token() === SyntaxKind.WhitespaceTrivia;
-                    tok = nextTokenJSDoc();
+                    if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
+                        tok = nextJSDocCommentTextToken(state === JSDocState.SavingBackticks);
+                    }
+                    else {
+                        tok = nextTokenJSDoc();
+                    }
                 }
 
                 removeLeadingNewlines(comments);
-                removeTrailingWhitespace(comments);
+                const trimmedComments = trimStringEnd(comments.join(""));
                 if (parts.length) {
-                    if (comments.length) {
-                        parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? commentsPos));
+                    if (trimmedComments.length) {
+                        parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? commentsPos));
                     }
                     return createNodeArray(parts, commentsPos, scanner.getTokenEnd());
                 }
-                else if (comments.length) {
-                    return comments.join("");
+                else if (trimmedComments.length) {
+                    return trimmedComments;
                 }
             }
 
-            function isNextJSDocTokenWhitespace() {
-                const next = nextTokenJSDoc();
-                return next === SyntaxKind.WhitespaceTrivia || next === SyntaxKind.NewLineTrivia;
-            }
-
             function parseJSDocLink(start: number) {
                 const linkType = tryParse(parseJSDocLinkPrefix);
                 if (!linkType) {
diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -81,6 +81,8 @@ export interface Scanner {
     reScanInvalidIdentifier(): SyntaxKind;
     scanJsxToken(): JsxTokenSyntaxKind;
     scanJsDocToken(): JSDocSyntaxKind;
+    /** @internal */
+    scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
     scan(): SyntaxKind;
 
     getText(): string;
@@ -1031,6 +1033,7 @@ export function createScanner(languageVersion: ScriptTarget,
         reScanInvalidIdentifier,
         scanJsxToken,
         scanJsDocToken,
+        scanJSDocCommentTextToken,
         scan,
         getText,
         clearCommentDirectives,
@@ -2467,6 +2470,34 @@ export function createScanner(languageVersion: ScriptTarget,
         return scanJsxAttributeValue();
     }
 
+    function scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
+        fullStartPos = tokenStart = pos;
+        tokenFlags = TokenFlags.None;
+        if (pos >= end) {
+            return token = SyntaxKind.EndOfFileToken;
+        }
+        for (let ch = text.charCodeAt(pos);
+             pos < end && (!isLineBreak(ch) && ch !== CharacterCodes.backtick);
+             ch = codePointAt(text, ++pos)) {
+            if (!inBackticks) {
+                if (ch === CharacterCodes.openBrace) {
+                    break;
+                }
+                else if (ch === CharacterCodes.at
+                    && pos - 1 >= 0 && isWhiteSpaceSingleLine(text.charCodeAt(pos - 1))
+                    && !(pos + 1 < end && isWhiteSpaceLike(text.charCodeAt(pos + 1)))) {
+                    // @ doesn't start a new tag inside ``, and elsewhere, only after whitespace and before non-whitespace
+                    break;
+                }
+            }
+        }
+        if (pos === tokenStart) {
+            return scanJsDocToken();
+        }
+        tokenValue = text.substring(tokenStart, pos);
+        return token = SyntaxKind.JSDocCommentTextToken;
+    }
+
     function scanJsDocToken(): JSDocSyntaxKind {
         fullStartPos = tokenStart = pos;
         tokenFlags = TokenFlags.None;
diff --git a/src/compiler/types.ts b/src/compiler/types.ts
@@ -129,6 +129,11 @@ export const enum SyntaxKind {
     // Identifiers and PrivateIdentifiers
     Identifier,
     PrivateIdentifier,
+    /**
+     * Only the special JSDoc comment text scanner produces JSDocCommentTextTokes. One of these tokens spans all text after a tag comment's start and before the next @
+     * @internal
+     */
+    JSDocCommentTextToken,
     // Reserved words
     BreakKeyword,
     CaseKeyword,
diff --git a/src/testRunner/unittests/jsDocParsing.ts b/src/testRunner/unittests/jsDocParsing.ts
@@ -393,6 +393,7 @@ oh.no
  * Some\n\n * text\r\n * with newlines.
  */`);
             parsesCorrectly("Chained tags, no leading whitespace", `/**@a @b @c@d*/`);
+            parsesCorrectly("Single trailing whitespace", `/** trailing whitespace */`);
             parsesCorrectly("Initial star is not a tag", `/***@a*/`);
             parsesCorrectly("Initial star space is not a tag", `/*** @a*/`);
             parsesCorrectly("Initial email address is not a tag", `/**bill@example.com*/`);
diff --git a/tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.Initial star space is not a tag.json b/tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.Initial star space is not a tag.json
@@ -5,5 +5,27 @@
     "flags": "JSDoc",
     "modifierFlagsCache": 0,
     "transformFlags": 0,
-    "comment": "* @a"
+    "comment": "*",
+    "tags": {
+        "0": {
+            "kind": "JSDocTag",
+            "pos": 5,
+            "end": 7,
+            "modifierFlagsCache": 0,
+            "transformFlags": 0,
+            "tagName": {
+                "kind": "Identifier",
+                "pos": 6,
+                "end": 7,
+                "modifierFlagsCache": 0,
+                "transformFlags": 0,
+                "escapedText": "a"
+            }
+        },
+        "length": 1,
+        "pos": 5,
+        "end": 7,
+        "hasTrailingComma": false,
+        "transformFlags": 0
+    }
 }
diff --git a/tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.Single trailing whitespace.json b/tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.Single trailing whitespace.json
@@ -0,0 +1,9 @@
+{
+    "kind": "JSDoc",
+    "pos": 0,
+    "end": 26,
+    "flags": "JSDoc",
+    "modifierFlagsCache": 0,
+    "transformFlags": 0,
+    "comment": "trailing whitespace"
+}
diff --git a/tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.asteriskAfterPreamble.json b/tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.asteriskAfterPreamble.json
@@ -5,5 +5,41 @@
     "flags": "JSDoc",
     "modifierFlagsCache": 0,
     "transformFlags": 0,
-    "comment": "* @type {number}"
+    "comment": "*",
+    "tags": {
+        "0": {
+            "kind": "JSDocTypeTag",
+            "pos": 6,
+            "end": 21,
+            "modifierFlagsCache": 0,
+            "transformFlags": 0,
+            "tagName": {
+                "kind": "Identifier",
+                "pos": 7,
+                "end": 11,
+                "modifierFlagsCache": 0,
+                "transformFlags": 0,
+                "escapedText": "type"
+            },
+            "typeExpression": {
+                "kind": "JSDocTypeExpression",
+                "pos": 12,
+                "end": 20,
+                "modifierFlagsCache": 0,
+                "transformFlags": 0,
+                "type": {
+                    "kind": "NumberKeyword",
+                    "pos": 13,
+                    "end": 19,
+                    "modifierFlagsCache": 0,
+                    "transformFlags": 1
+                }
+            }
+        },
+        "length": 1,
+        "pos": 6,
+        "end": 21,
+        "hasTrailingComma": false,
+        "transformFlags": 0
+    }
 }
diff --git a/tests/baselines/reference/api/tsserverlibrary.d.ts b/tests/baselines/reference/api/tsserverlibrary.d.ts
diff --git a/tests/baselines/reference/api/typescript.d.ts b/tests/baselines/reference/api/typescript.d.ts