Skip to content

Commit 137c461

Browse files
authored
Scan bigger/fewer jsdoc tokens (#53081)
1 parent 4af97b0 commit 137c461

9 files changed

+771
-654
lines changed

src/compiler/parser.ts

Lines changed: 69 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ import {
364364
tracing,
365365
TransformFlags,
366366
trimString,
367+
trimStringEnd,
367368
TryStatement,
368369
TupleTypeNode,
369370
TypeAliasDeclaration,
@@ -2165,6 +2166,10 @@ namespace Parser {
21652166
return currentToken = scanner.scanJsDocToken();
21662167
}
21672168

2169+
function nextJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
2170+
return currentToken = scanner.scanJSDocCommentTextToken(inBackticks);
2171+
}
2172+
21682173
function reScanGreaterToken(): SyntaxKind {
21692174
return currentToken = scanner.reScanGreaterToken();
21702175
}
@@ -8602,19 +8607,14 @@ namespace Parser {
86028607
loop: while (true) {
86038608
switch (token()) {
86048609
case SyntaxKind.AtToken:
8605-
if (state === JSDocState.BeginningOfLine || state === JSDocState.SawAsterisk) {
8606-
removeTrailingWhitespace(comments);
8607-
if (!commentsPos) commentsPos = getNodePos();
8608-
addTag(parseTag(indent));
8609-
// NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
8610-
// Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
8611-
// for malformed examples like `/** @param {string} x @returns {number} the length */`
8612-
state = JSDocState.BeginningOfLine;
8613-
margin = undefined;
8614-
}
8615-
else {
8616-
pushComment(scanner.getTokenText());
8617-
}
8610+
removeTrailingWhitespace(comments);
8611+
if (!commentsPos) commentsPos = getNodePos();
8612+
addTag(parseTag(indent));
8613+
// NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
8614+
// Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
8615+
// for malformed examples like `/** @param {string} x @returns {number} the length */`
8616+
state = JSDocState.BeginningOfLine;
8617+
margin = undefined;
86188618
break;
86198619
case SyntaxKind.NewLineTrivia:
86208620
comments.push(scanner.getTokenText());
@@ -8623,30 +8623,33 @@ namespace Parser {
86238623
break;
86248624
case SyntaxKind.AsteriskToken:
86258625
const asterisk = scanner.getTokenText();
8626-
if (state === JSDocState.SawAsterisk || state === JSDocState.SavingComments) {
8626+
if (state === JSDocState.SawAsterisk) {
86278627
// If we've already seen an asterisk, then we can no longer parse a tag on this line
86288628
state = JSDocState.SavingComments;
86298629
pushComment(asterisk);
86308630
}
86318631
else {
8632+
Debug.assert(state === JSDocState.BeginningOfLine);
86328633
// Ignore the first asterisk on a line
86338634
state = JSDocState.SawAsterisk;
86348635
indent += asterisk.length;
86358636
}
86368637
break;
86378638
case SyntaxKind.WhitespaceTrivia:
8639+
Debug.assert(state !== JSDocState.SavingComments, "whitespace shouldn't come from the scanner while saving top-level comment text");
86388640
// only collect whitespace if we're already saving comments or have just crossed the comment indent margin
86398641
const whitespace = scanner.getTokenText();
8640-
if (state === JSDocState.SavingComments) {
8641-
comments.push(whitespace);
8642-
}
8643-
else if (margin !== undefined && indent + whitespace.length > margin) {
8642+
if (margin !== undefined && indent + whitespace.length > margin) {
86448643
comments.push(whitespace.slice(margin - indent));
86458644
}
86468645
indent += whitespace.length;
86478646
break;
86488647
case SyntaxKind.EndOfFileToken:
86498648
break loop;
8649+
case SyntaxKind.JSDocCommentTextToken:
8650+
state = JSDocState.SavingComments;
8651+
pushComment(scanner.getTokenValue());
8652+
break;
86508653
case SyntaxKind.OpenBraceToken:
86518654
state = JSDocState.SavingComments;
86528655
const commentEnd = scanner.getTokenFullStart();
@@ -8671,15 +8674,20 @@ namespace Parser {
86718674
pushComment(scanner.getTokenText());
86728675
break;
86738676
}
8674-
nextTokenJSDoc();
8677+
if (state === JSDocState.SavingComments) {
8678+
nextJSDocCommentTextToken(/*inBackticks*/ false);
8679+
}
8680+
else {
8681+
nextTokenJSDoc();
8682+
}
86758683
}
8676-
removeTrailingWhitespace(comments);
8677-
if (parts.length && comments.length) {
8678-
parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? start, commentsPos));
8684+
const trimmedComments = trimStringEnd(comments.join(""));
8685+
if (parts.length && trimmedComments.length) {
8686+
parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? start, commentsPos));
86798687
}
86808688
if (parts.length && tags) Debug.assertIsDefined(commentsPos, "having parsed tags implies that the end of the comment span should be set");
86818689
const tagsArray = tags && createNodeArray(tags, tagsPos, tagsEnd);
8682-
return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : comments.length ? comments.join("") : undefined, tagsArray), start, end);
8690+
return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : trimmedComments.length ? trimmedComments : undefined, tagsArray), start, end);
86838691
});
86848692

86858693
function removeLeadingNewlines(comments: string[]) {
@@ -8689,8 +8697,18 @@ namespace Parser {
86898697
}
86908698

86918699
function removeTrailingWhitespace(comments: string[]) {
8692-
while (comments.length && comments[comments.length - 1].trim() === "") {
8693-
comments.pop();
8700+
while (comments.length) {
8701+
const trimmed = trimStringEnd(comments[comments.length - 1]);
8702+
if (trimmed === "") {
8703+
comments.pop();
8704+
}
8705+
else if (trimmed.length < comments[comments.length - 1].length) {
8706+
comments[comments.length - 1] = trimmed;
8707+
break;
8708+
}
8709+
else {
8710+
break;
8711+
}
86948712
}
86958713
}
86968714

@@ -8846,7 +8864,6 @@ namespace Parser {
88468864
const parts: JSDocComment[] = [];
88478865
let linkEnd;
88488866
let state = JSDocState.BeginningOfLine;
8849-
let previousWhitespace = true;
88508867
let margin: number | undefined;
88518868
function pushComment(text: string) {
88528869
if (!margin) {
@@ -8862,7 +8879,7 @@ namespace Parser {
88628879
}
88638880
state = JSDocState.SawAsterisk;
88648881
}
8865-
let tok = token() as JSDocSyntaxKind;
8882+
let tok = token() as JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
88668883
loop: while (true) {
88678884
switch (tok) {
88688885
case SyntaxKind.NewLineTrivia:
@@ -8872,29 +8889,20 @@ namespace Parser {
88728889
indent = 0;
88738890
break;
88748891
case SyntaxKind.AtToken:
8875-
if (state === JSDocState.SavingBackticks
8876-
|| state === JSDocState.SavingComments && (!previousWhitespace || lookAhead(isNextJSDocTokenWhitespace))) {
8877-
// @ doesn't start a new tag inside ``, and inside a comment, only after whitespace or not before whitespace
8878-
comments.push(scanner.getTokenText());
8879-
break;
8880-
}
88818892
scanner.resetTokenState(scanner.getTokenEnd() - 1);
8882-
// falls through
8893+
break loop;
88838894
case SyntaxKind.EndOfFileToken:
88848895
// Done
88858896
break loop;
88868897
case SyntaxKind.WhitespaceTrivia:
8887-
if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
8888-
pushComment(scanner.getTokenText());
8889-
}
8890-
else {
8891-
const whitespace = scanner.getTokenText();
8892-
// if the whitespace crosses the margin, take only the whitespace that passes the margin
8893-
if (margin !== undefined && indent + whitespace.length > margin) {
8894-
comments.push(whitespace.slice(margin - indent));
8895-
}
8896-
indent += whitespace.length;
8898+
Debug.assert(state !== JSDocState.SavingComments && state !== JSDocState.SavingBackticks, "whitespace shouldn't come from the scanner while saving comment text");
8899+
const whitespace = scanner.getTokenText();
8900+
// if the whitespace crosses the margin, take only the whitespace that passes the margin
8901+
if (margin !== undefined && indent + whitespace.length > margin) {
8902+
comments.push(whitespace.slice(margin - indent));
8903+
state = JSDocState.SavingComments;
88978904
}
8905+
indent += whitespace.length;
88988906
break;
88998907
case SyntaxKind.OpenBraceToken:
89008908
state = JSDocState.SavingComments;
@@ -8920,6 +8928,12 @@ namespace Parser {
89208928
}
89218929
pushComment(scanner.getTokenText());
89228930
break;
8931+
case SyntaxKind.JSDocCommentTextToken:
8932+
if (state !== JSDocState.SavingBackticks) {
8933+
state = JSDocState.SavingComments; // leading identifiers start recording as well
8934+
}
8935+
pushComment(scanner.getTokenValue());
8936+
break;
89238937
case SyntaxKind.AsteriskToken:
89248938
if (state === JSDocState.BeginningOfLine) {
89258939
// leading asterisks start recording on the *next* (non-whitespace) token
@@ -8936,28 +8950,27 @@ namespace Parser {
89368950
pushComment(scanner.getTokenText());
89378951
break;
89388952
}
8939-
previousWhitespace = token() === SyntaxKind.WhitespaceTrivia;
8940-
tok = nextTokenJSDoc();
8953+
if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
8954+
tok = nextJSDocCommentTextToken(state === JSDocState.SavingBackticks);
8955+
}
8956+
else {
8957+
tok = nextTokenJSDoc();
8958+
}
89418959
}
89428960

89438961
removeLeadingNewlines(comments);
8944-
removeTrailingWhitespace(comments);
8962+
const trimmedComments = trimStringEnd(comments.join(""));
89458963
if (parts.length) {
8946-
if (comments.length) {
8947-
parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? commentsPos));
8964+
if (trimmedComments.length) {
8965+
parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? commentsPos));
89488966
}
89498967
return createNodeArray(parts, commentsPos, scanner.getTokenEnd());
89508968
}
8951-
else if (comments.length) {
8952-
return comments.join("");
8969+
else if (trimmedComments.length) {
8970+
return trimmedComments;
89538971
}
89548972
}
89558973

8956-
function isNextJSDocTokenWhitespace() {
8957-
const next = nextTokenJSDoc();
8958-
return next === SyntaxKind.WhitespaceTrivia || next === SyntaxKind.NewLineTrivia;
8959-
}
8960-
89618974
function parseJSDocLink(start: number) {
89628975
const linkType = tryParse(parseJSDocLinkPrefix);
89638976
if (!linkType) {

src/compiler/scanner.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ export interface Scanner {
8181
reScanInvalidIdentifier(): SyntaxKind;
8282
scanJsxToken(): JsxTokenSyntaxKind;
8383
scanJsDocToken(): JSDocSyntaxKind;
84+
/** @internal */
85+
scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
8486
scan(): SyntaxKind;
8587

8688
getText(): string;
@@ -1031,6 +1033,7 @@ export function createScanner(languageVersion: ScriptTarget,
10311033
reScanInvalidIdentifier,
10321034
scanJsxToken,
10331035
scanJsDocToken,
1036+
scanJSDocCommentTextToken,
10341037
scan,
10351038
getText,
10361039
clearCommentDirectives,
@@ -2467,6 +2470,34 @@ export function createScanner(languageVersion: ScriptTarget,
24672470
return scanJsxAttributeValue();
24682471
}
24692472

2473+
function scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
2474+
fullStartPos = tokenStart = pos;
2475+
tokenFlags = TokenFlags.None;
2476+
if (pos >= end) {
2477+
return token = SyntaxKind.EndOfFileToken;
2478+
}
2479+
for (let ch = text.charCodeAt(pos);
2480+
pos < end && (!isLineBreak(ch) && ch !== CharacterCodes.backtick);
2481+
ch = codePointAt(text, ++pos)) {
2482+
if (!inBackticks) {
2483+
if (ch === CharacterCodes.openBrace) {
2484+
break;
2485+
}
2486+
else if (ch === CharacterCodes.at
2487+
&& pos - 1 >= 0 && isWhiteSpaceSingleLine(text.charCodeAt(pos - 1))
2488+
&& !(pos + 1 < end && isWhiteSpaceLike(text.charCodeAt(pos + 1)))) {
2489+
// @ doesn't start a new tag inside ``, and elsewhere, only after whitespace and before non-whitespace
2490+
break;
2491+
}
2492+
}
2493+
}
2494+
if (pos === tokenStart) {
2495+
return scanJsDocToken();
2496+
}
2497+
tokenValue = text.substring(tokenStart, pos);
2498+
return token = SyntaxKind.JSDocCommentTextToken;
2499+
}
2500+
24702501
function scanJsDocToken(): JSDocSyntaxKind {
24712502
fullStartPos = tokenStart = pos;
24722503
tokenFlags = TokenFlags.None;

src/compiler/types.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ export const enum SyntaxKind {
129129
// Identifiers and PrivateIdentifiers
130130
Identifier,
131131
PrivateIdentifier,
132+
/**
133+
* Only the special JSDoc comment text scanner produces JSDocCommentTextTokes. One of these tokens spans all text after a tag comment's start and before the next @
134+
* @internal
135+
*/
136+
JSDocCommentTextToken,
132137
// Reserved words
133138
BreakKeyword,
134139
CaseKeyword,

src/testRunner/unittests/jsDocParsing.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ oh.no
393393
* Some\n\n * text\r\n * with newlines.
394394
*/`);
395395
parsesCorrectly("Chained tags, no leading whitespace", `/**@a @b @c@d*/`);
396+
parsesCorrectly("Single trailing whitespace", `/** trailing whitespace */`);
396397
parsesCorrectly("Initial star is not a tag", `/***@a*/`);
397398
parsesCorrectly("Initial star space is not a tag", `/*** @a*/`);
398399
parsesCorrectly("Initial email address is not a tag", `/**[email protected]*/`);

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.Initial star space is not a tag.json

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,27 @@
55
"flags": "JSDoc",
66
"modifierFlagsCache": 0,
77
"transformFlags": 0,
8-
"comment": "* @a"
8+
"comment": "*",
9+
"tags": {
10+
"0": {
11+
"kind": "JSDocTag",
12+
"pos": 5,
13+
"end": 7,
14+
"modifierFlagsCache": 0,
15+
"transformFlags": 0,
16+
"tagName": {
17+
"kind": "Identifier",
18+
"pos": 6,
19+
"end": 7,
20+
"modifierFlagsCache": 0,
21+
"transformFlags": 0,
22+
"escapedText": "a"
23+
}
24+
},
25+
"length": 1,
26+
"pos": 5,
27+
"end": 7,
28+
"hasTrailingComma": false,
29+
"transformFlags": 0
30+
}
931
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"kind": "JSDoc",
3+
"pos": 0,
4+
"end": 26,
5+
"flags": "JSDoc",
6+
"modifierFlagsCache": 0,
7+
"transformFlags": 0,
8+
"comment": "trailing whitespace"
9+
}

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.asteriskAfterPreamble.json

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,41 @@
55
"flags": "JSDoc",
66
"modifierFlagsCache": 0,
77
"transformFlags": 0,
8-
"comment": "* @type {number}"
8+
"comment": "*",
9+
"tags": {
10+
"0": {
11+
"kind": "JSDocTypeTag",
12+
"pos": 6,
13+
"end": 21,
14+
"modifierFlagsCache": 0,
15+
"transformFlags": 0,
16+
"tagName": {
17+
"kind": "Identifier",
18+
"pos": 7,
19+
"end": 11,
20+
"modifierFlagsCache": 0,
21+
"transformFlags": 0,
22+
"escapedText": "type"
23+
},
24+
"typeExpression": {
25+
"kind": "JSDocTypeExpression",
26+
"pos": 12,
27+
"end": 20,
28+
"modifierFlagsCache": 0,
29+
"transformFlags": 0,
30+
"type": {
31+
"kind": "NumberKeyword",
32+
"pos": 13,
33+
"end": 19,
34+
"modifierFlagsCache": 0,
35+
"transformFlags": 1
36+
}
37+
}
38+
},
39+
"length": 1,
40+
"pos": 6,
41+
"end": 21,
42+
"hasTrailingComma": false,
43+
"transformFlags": 0
44+
}
945
}

0 commit comments

Comments
 (0)