From dffd88ee51447d45b373a64f4434ab140e433c2a Mon Sep 17 00:00:00 2001 From: Rintaro Ishizaki Date: Tue, 9 Dec 2025 12:17:27 -0800 Subject: [PATCH] [Parser] Eliminate 'CommentRetentionMode::None' in Lexer Lexer should always set `Token.CommentLength` correctly because it necessary for restoring to the token position with comments. Otherwise, 'Token::isAtStartOfLine()' might not correctly set. --- include/swift/Parse/Lexer.h | 3 +- include/swift/Parse/Parser.h | 4 +++ lib/IDE/Formatting.cpp | 10 +++---- lib/Parse/Lexer.cpp | 4 +-- lib/Parse/ParseDecl.cpp | 2 +- lib/Parse/ParseGeneric.cpp | 2 +- lib/Parse/Parser.cpp | 34 +++++++++------------- lib/Refactoring/Async/AsyncConverter.cpp | 4 +-- lib/Sema/MiscDiagnostics.cpp | 9 +++--- test/Parse/backgtrack-startofline.swift | 7 +++++ unittests/Parse/LexerTests.cpp | 36 +++--------------------- 11 files changed, 45 insertions(+), 70 deletions(-) create mode 100644 test/Parse/backgtrack-startofline.swift diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h index 5390a059892d7..d845ba0d1932d 100644 --- a/include/swift/Parse/Lexer.h +++ b/include/swift/Parse/Lexer.h @@ -39,7 +39,6 @@ namespace swift { template struct Diag; enum class CommentRetentionMode { - None, AttachToNextToken, ReturnAsTokens, }; @@ -185,7 +184,7 @@ class Lexer { const LangOptions &Options, const SourceManager &SourceMgr, unsigned BufferID, DiagnosticEngine *Diags, LexerMode LexMode, HashbangMode HashbangAllowed = HashbangMode::Disallowed, - CommentRetentionMode RetainComments = CommentRetentionMode::None); + CommentRetentionMode RetainComments = CommentRetentionMode::AttachToNextToken); /// Create a lexer that scans a subrange of the source buffer. Lexer(const LangOptions &Options, const SourceManager &SourceMgr, diff --git a/include/swift/Parse/Parser.h b/include/swift/Parse/Parser.h index 9877dfddc848a..c06cdc402a9e8 100644 --- a/include/swift/Parse/Parser.h +++ b/include/swift/Parse/Parser.h @@ -693,6 +693,10 @@ class Parser { return Context.LangOpts.EnableExperimentalConcurrency; } + bool shouldAttachCommentToDecl() { + return Context.LangOpts.AttachCommentsToDecls; + } + /// Returns true if a Swift declaration starts after the current token, /// otherwise returns false. bool isNextStartOfSwiftDecl() { diff --git a/lib/IDE/Formatting.cpp b/lib/IDE/Formatting.cpp index cbe1d9ca7422a..364c990da08e5 100644 --- a/lib/IDE/Formatting.cpp +++ b/lib/IDE/Formatting.cpp @@ -49,7 +49,7 @@ static void widenOrSet(SourceRange &First, SourceRange Second) { static bool isFirstTokenOnLine(SourceManager &SM, SourceLoc Loc) { assert(Loc.isValid()); SourceLoc LineStart = Lexer::getLocForStartOfLine(SM, Loc); - CommentRetentionMode SkipComments = CommentRetentionMode::None; + CommentRetentionMode SkipComments = CommentRetentionMode::AttachToNextToken; Token First = Lexer::getTokenAtLocation(SM, LineStart, SkipComments); return First.getLoc() == Loc; } @@ -76,8 +76,8 @@ static std::optional getTokenAfter(SourceManager &SM, SourceLoc Loc, bool SkipComments = true) { assert(Loc.isValid()); CommentRetentionMode Mode = SkipComments - ? CommentRetentionMode::None - : CommentRetentionMode::ReturnAsTokens; + ? CommentRetentionMode::AttachToNextToken + : CommentRetentionMode::ReturnAsTokens; assert(Lexer::getTokenAtLocation(SM, Loc, Mode).getLoc() == Loc); SourceLoc End = Lexer::getLocForEndOfToken(SM, Loc); Token Next = Lexer::getTokenAtLocation(SM, End, Mode); @@ -814,8 +814,8 @@ class OutdentChecker: protected RangeWalker { } else if (R.isValid()) { // Check condition 2a. SourceLoc LineStart = Lexer::getLocForStartOfLine(SM, R); - Token First = Lexer::getTokenAtLocation(SM, LineStart, - CommentRetentionMode::None); + Token First = Lexer::getTokenAtLocation( + SM, LineStart, CommentRetentionMode::AttachToNextToken); IsOutdenting |= First.getLoc() == R; } diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index d9f3c70ffddc9..ca0cb2dd8de84 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -270,7 +270,7 @@ Token Lexer::getTokenAt(SourceLoc Loc) { "location from the wrong buffer"); Lexer L(LangOpts, SourceMgr, BufferID, getUnderlyingDiags(), LexMode, - HashbangMode::Allowed, CommentRetentionMode::None); + HashbangMode::Allowed, CommentRetentionMode::AttachToNextToken); L.restoreState(State(Loc)); return L.peekNextToken(); } @@ -3029,7 +3029,7 @@ static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM, LangOptions FakeLangOptions; Lexer L(FakeLangOptions, SM, BufferID, nullptr, LexerMode::Swift, - HashbangMode::Allowed, CommentRetentionMode::None, + HashbangMode::Allowed, CommentRetentionMode::AttachToNextToken, BufferStart, BufferEnd); // Lex tokens until we find the token that contains the source location. diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index f80bee63975e0..190d52c256328 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -6204,7 +6204,7 @@ ParserStatus Parser::parseDecl(bool IsAtStartOfLineOrPreviousHadSemi, // Parse attributes. DeclAttributes Attributes; - if (Tok.hasComment()) + if (Tok.hasComment() && shouldAttachCommentToDecl()) Attributes.add(new (Context) RawDocCommentAttr(Tok.getCommentRange())); ParserStatus AttrStatus = parseDeclAttributeList( Attributes, IfConfigsAreDeclAttrs); diff --git a/lib/Parse/ParseGeneric.cpp b/lib/Parse/ParseGeneric.cpp index ea0cfa8d9e9f5..e2450965022d9 100644 --- a/lib/Parse/ParseGeneric.cpp +++ b/lib/Parse/ParseGeneric.cpp @@ -56,7 +56,7 @@ Parser::parseGenericParametersBeforeWhere(SourceLoc LAngleLoc, // Parse attributes. DeclAttributes attributes; - if (Tok.hasComment()) + if (Tok.hasComment() && shouldAttachCommentToDecl()) attributes.add(new (Context) RawDocCommentAttr(Tok.getCommentRange())); parseDeclAttributeList(attributes); diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index 92b392131b44d..4b7c67f982542 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -341,21 +341,15 @@ Parser::Parser(unsigned BufferID, SourceFile &SF, SILParserStateBase *SIL, PersistentParserState *PersistentState) : Parser(BufferID, SF, &SF.getASTContext().Diags, SIL, PersistentState) {} -Parser::Parser(unsigned BufferID, SourceFile &SF, DiagnosticEngine* LexerDiags, - SILParserStateBase *SIL, - PersistentParserState *PersistentState) - : Parser( - std::unique_ptr(new Lexer( - SF.getASTContext().LangOpts, SF.getASTContext().SourceMgr, - BufferID, LexerDiags, - sourceFileKindToLexerMode(SF.Kind), - SF.Kind == SourceFileKind::Main - ? HashbangMode::Allowed - : HashbangMode::Disallowed, - SF.getASTContext().LangOpts.AttachCommentsToDecls - ? CommentRetentionMode::AttachToNextToken - : CommentRetentionMode::None)), - SF, SIL, PersistentState) {} +Parser::Parser(unsigned BufferID, SourceFile &SF, DiagnosticEngine *LexerDiags, + SILParserStateBase *SIL, PersistentParserState *PersistentState) + : Parser(std::unique_ptr(new Lexer( + SF.getASTContext().LangOpts, SF.getASTContext().SourceMgr, + BufferID, LexerDiags, sourceFileKindToLexerMode(SF.Kind), + SF.Kind == SourceFileKind::Main ? HashbangMode::Allowed + : HashbangMode::Disallowed, + CommentRetentionMode::AttachToNextToken)), + SF, SIL, PersistentState) {} namespace { @@ -1243,12 +1237,10 @@ ParserUnit::ParserUnit(SourceManager &SM, SourceFileKind SFKind, : Impl(*new Implementation(SM, SFKind, BufferID, LangOptions(), "input")) { std::unique_ptr Lex; - Lex.reset(new Lexer(Impl.LangOpts, SM, - BufferID, &Impl.Diags, - LexerMode::Swift, - HashbangMode::Allowed, - CommentRetentionMode::None, - Offset, EndOffset)); + Lex.reset(new Lexer(Impl.LangOpts, SM, BufferID, &Impl.Diags, + LexerMode::Swift, HashbangMode::Allowed, + CommentRetentionMode::AttachToNextToken, Offset, + EndOffset)); Impl.TheParser.reset(new Parser(std::move(Lex), *Impl.SF, /*SIL=*/nullptr, /*PersistentState=*/nullptr)); } diff --git a/lib/Refactoring/Async/AsyncConverter.cpp b/lib/Refactoring/Async/AsyncConverter.cpp index 1b4bc8708b79f..156ee48444be0 100644 --- a/lib/Refactoring/Async/AsyncConverter.cpp +++ b/lib/Refactoring/Async/AsyncConverter.cpp @@ -651,8 +651,8 @@ void AsyncConverter::addFuncDecl(const FuncDecl *FD) { LeftEndLoc = Lexer::getLocForEndOfToken( SM, Params->get(TopHandler.Index - 1)->getEndLoc()); // Skip to the end of any comments - Token Next = - Lexer::getTokenAtLocation(SM, LeftEndLoc, CommentRetentionMode::None); + Token Next = Lexer::getTokenAtLocation( + SM, LeftEndLoc, CommentRetentionMode::AttachToNextToken); if (Next.getKind() != tok::NUM_TOKENS) LeftEndLoc = Next.getLoc(); break; diff --git a/lib/Sema/MiscDiagnostics.cpp b/lib/Sema/MiscDiagnostics.cpp index 0710fddd3aabe..81ce8ab549a86 100644 --- a/lib/Sema/MiscDiagnostics.cpp +++ b/lib/Sema/MiscDiagnostics.cpp @@ -2559,8 +2559,9 @@ class ImplicitSelfUsageChecker : public BaseDiagnosticWalker { // insert 'self,'. If it wasn't a valid entry, then we will at least not // be introducing any new errors/warnings... const auto locAfterBracket = brackets.Start.getAdvancedLoc(1); - const auto nextAfterBracket = Lexer::getTokenAtLocation( - Ctx.SourceMgr, locAfterBracket, CommentRetentionMode::None); + const auto nextAfterBracket = + Lexer::getTokenAtLocation(Ctx.SourceMgr, locAfterBracket, + CommentRetentionMode::AttachToNextToken); if (nextAfterBracket.getLoc() != brackets.End) diag.fixItInsertAfter(brackets.Start, "self, "); else @@ -2581,8 +2582,8 @@ class ImplicitSelfUsageChecker : public BaseDiagnosticWalker { // opening brace of the closure, we may need to pad the fix-it // with a space. const auto nextLoc = closureExpr->getLoc().getAdvancedLoc(1); - const auto next = Lexer::getTokenAtLocation(Ctx.SourceMgr, nextLoc, - CommentRetentionMode::None); + const auto next = Lexer::getTokenAtLocation( + Ctx.SourceMgr, nextLoc, CommentRetentionMode::AttachToNextToken); std::string trailing = next.getLoc() == nextLoc ? " " : ""; diag.fixItInsertAfter(closureExpr->getLoc(), " [self] in" + trailing); diff --git a/test/Parse/backgtrack-startofline.swift b/test/Parse/backgtrack-startofline.swift new file mode 100644 index 0000000000000..d59a4b9639a14 --- /dev/null +++ b/test/Parse/backgtrack-startofline.swift @@ -0,0 +1,7 @@ +// RUN: %target-typecheck-verify-swift -verify-ignore-unrelated + +func test() { + _ = "test" + /* comment */ #if true + #endif +} diff --git a/unittests/Parse/LexerTests.cpp b/unittests/Parse/LexerTests.cpp index 82b9d276c7f82..f016dc020949c 100644 --- a/unittests/Parse/LexerTests.cpp +++ b/unittests/Parse/LexerTests.cpp @@ -271,32 +271,6 @@ TEST_F(LexerTest, ContentStartTokenIsStartOfLineUTF8BOM) { ASSERT_TRUE(Tok.isAtStartOfLine()); } -TEST_F(LexerTest, BOMNoCommentNoTrivia) { - const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */"; - - LangOptions LangOpts; - SourceManager SourceMgr; - unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source)); - - Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift, - HashbangMode::Disallowed, CommentRetentionMode::None); - - Token Tok; - - L.lex(Tok); - ASSERT_EQ(tok::identifier, Tok.getKind()); - ASSERT_EQ("aaa", Tok.getText()); - ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc()); - ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getCommentRange().getStart()); - ASSERT_EQ(0u, Tok.getCommentRange().getByteLength()); - - L.lex(Tok); - ASSERT_EQ(tok::eof, Tok.getKind()); - ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc()); - ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getCommentRange().getStart()); - ASSERT_EQ(0u, Tok.getCommentRange().getByteLength()); -} - TEST_F(LexerTest, BOMTokenCommentNoTrivia) { const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */"; @@ -718,9 +692,8 @@ TEST_F(LexerTest, DiagnoseEmbeddedNul) { DiagnosticEngine Diags(SourceMgr); Diags.addConsumer(DiagConsumer); - Lexer L(LangOpts, SourceMgr, BufferID, &Diags, - LexerMode::Swift, HashbangMode::Disallowed, - CommentRetentionMode::None); + Lexer L(LangOpts, SourceMgr, BufferID, &Diags, LexerMode::Swift, + HashbangMode::Disallowed, CommentRetentionMode::AttachToNextToken); Token Tok; L.lex(Tok); @@ -743,9 +716,8 @@ TEST_F(LexerTest, DiagnoseEmbeddedNulOffset) { DiagnosticEngine Diags(SourceMgr); Diags.addConsumer(DiagConsumer); - Lexer L(LangOpts, SourceMgr, BufferID, &Diags, - LexerMode::Swift, HashbangMode::Disallowed, - CommentRetentionMode::None, + Lexer L(LangOpts, SourceMgr, BufferID, &Diags, LexerMode::Swift, + HashbangMode::Disallowed, CommentRetentionMode::AttachToNextToken, /*Offset=*/5, /*EndOffset=*/SourceLen); ASSERT_FALSE(containsPrefix(