From 72b43bd2f392a009187e1cdd90627691a4017707 Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin Date: Wed, 18 Sep 2024 09:02:23 -0700 Subject: [PATCH 1/6] [C++20][Modules] Fix crash when function and lambda inside loaded from different modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Because AST loading code is lazy and happens in unpredictable order, it is possible that a function and lambda inside the function can be loaded from different modules. As a result, the captured DeclRefExpr won’t match the corresponding VarDecl inside the function. This situation is reflected in the AST as follows: ``` FunctionDecl 0x555564f4aff0 line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected ()' inline |-also in ./folly-conv.h `-CompoundStmt 0x555564f7cfc8 |-DeclStmt 0x555564f7ced8 | `-VarDecl 0x555564f7cef8 col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit | `-IntegerLiteral 0x555564f7d080 'int' 0 |-CallExpr 0x555564f7cea8 '' | |-UnresolvedLookupExpr 0x555564f7bea0 '' lvalue (no ADL) = 'then_' 0x555564f7bef0 | |-CXXTemporaryObjectExpr 0x555564f7bcb0 'Expected':'folly::Expected' 'void () noexcept' zeroing | `-LambdaExpr 0x555564f7bc88 '(lambda at Conv.h:39:48)' | |-CXXRecordDecl 0x555564f76b88 col:48 imported in ./folly-conv.h hidden implicit class definition | | |-also in ./thrift_cpp2_base.h | | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init | | |-DefaultConstructor defaulted_is_constexpr | | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveConstructor exists simple trivial needs_implicit | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveAssignment | | `-Destructor simple irrelevant trivial constexpr needs_implicit | `-CompoundStmt 0x555564f7d1a8 | `-ReturnStmt 0x555564f7d198 | `-DeclRefExpr 0x555564f7d0a0 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture `-ReturnStmt 0x555564f7bc78 `-InitListExpr 0x555564f7bc38 'void' ``` This diff modifies the AST deserialization process to load lambdas within the canonical function declaration sooner, immediately following the function, ensuring that they are loaded from the same module. Re-land https://github.com/llvm/llvm-project/pull/104512 Added test case that caused crash due to multiple enclosed lambdas deserialization. Test Plan: check-clang --- clang/include/clang/Serialization/ASTReader.h | 9 +++ clang/lib/Serialization/ASTReader.cpp | 14 +++- clang/lib/Serialization/ASTReaderDecl.cpp | 10 +++ clang/lib/Serialization/ASTWriterDecl.cpp | 42 ++++++++++ ...rash-instantiated-in-scope-cxx-modules.cpp | 76 +++++++++++++++++++ ...ash-instantiated-in-scope-cxx-modules2.cpp | 30 ++++++++ ...ash-instantiated-in-scope-cxx-modules3.cpp | 26 +++++++ 7 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 898f4392465fd..1b3812e7fd452 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1188,6 +1188,15 @@ class ASTReader /// once recursing loading has been completed. llvm::SmallVector PendingOdrMergeChecks; + /// Lambdas that need to be loaded immediately after the function they belong + /// to. It is necessary to have a canonical declaration for the lambda class + /// from the same module as the enclosing function. This requirement ensures + /// the correct resolution of captured variables in the lambda. Without this, + /// due to lazy deserialization, canonical declarations for the function and + /// lambdas can be from different modules, and DeclRefExprs may refer to AST + /// nodes that do not exist in the function. + SmallVector PendingLambdas; + using DataPointers = std::pair; using ObjCInterfaceDataPointers = diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 7efcc81e194d9..d7bad6bbad0c9 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -9777,7 +9777,8 @@ void ASTReader::finishPendingActions() { !PendingDeducedVarTypes.empty() || !PendingIncompleteDeclChains.empty() || !PendingDeclChains.empty() || !PendingMacroIDs.empty() || !PendingDeclContextInfos.empty() || !PendingUpdateRecords.empty() || - !PendingObjCExtensionIvarRedeclarations.empty()) { + !PendingObjCExtensionIvarRedeclarations.empty() || + !PendingLambdas.empty()) { // If any identifiers with corresponding top-level declarations have // been loaded, load those declarations now. using TopLevelDeclsMap = @@ -9922,6 +9923,17 @@ void ASTReader::finishPendingActions() { } PendingObjCExtensionIvarRedeclarations.pop_back(); } + + // Load any pending lambdas. During the deserialization of pending lambdas, + // more lambdas can be discovered, so swap the current PendingLambdas with a + // local empty vector. Newly discovered lambdas will be deserialized in the + // next iteration. + if (!PendingLambdas.empty()) { + SmallVector DeclIDs; + DeclIDs.swap(PendingLambdas); + for (auto ID : DeclIDs) + GetDecl(ID); + } } // At this point, all update records for loaded decls are in place, so any diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 9272e23c7da3f..20e577404d997 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1155,6 +1155,16 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); FD->setParams(Reader.getContext(), Params); + + // For the first decl add all lambdas inside for loading them later, + // otherwise skip them. + unsigned NumLambdas = Record.readInt(); + if (FD->isFirstDecl()) { + for (unsigned I = 0; I != NumLambdas; ++I) + Reader.PendingLambdas.push_back(Record.readDeclID()); + } else { + Record.skipInts(NumLambdas); + } } void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) { diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 555f6325da646..732a6e21f340d 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -18,6 +18,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/SourceManager.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTRecordWriter.h" @@ -625,6 +626,33 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { : QualType()); } +static llvm::SmallVector collectLambdas(FunctionDecl *D) { + struct LambdaCollector : public ConstStmtVisitor { + llvm::SmallVectorImpl &Lambdas; + + LambdaCollector(llvm::SmallVectorImpl &Lambdas) + : Lambdas(Lambdas) {} + + void VisitLambdaExpr(const LambdaExpr *E) { + VisitStmt(E); + Lambdas.push_back(E->getLambdaClass()); + } + + void VisitStmt(const Stmt *S) { + if (!S) + return; + for (const Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + }; + + llvm::SmallVector Lambdas; + if (D->hasBody()) + LambdaCollector(Lambdas).VisitStmt(D->getBody()); + return Lambdas; +} + void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { static_assert(DeclContext::NumFunctionDeclBits == 44, "You need to update the serializer after you change the " @@ -764,6 +792,19 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { Record.push_back(D->param_size()); for (auto *P : D->parameters()) Record.AddDeclRef(P); + + // Store references to all lambda decls inside function to load them + // immediately after loading the function to make sure that canonical + // decls for lambdas will be from the same module. + if (D->isCanonicalDecl()) { + llvm::SmallVector Lambdas = collectLambdas(D); + Record.push_back(Lambdas.size()); + for (const auto *L : Lambdas) + Record.AddDeclRef(L); + } else { + Record.push_back(0); + } + Code = serialization::DECL_FUNCTION; } @@ -2239,6 +2280,7 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) { // // This is: // NumParams and Params[] from FunctionDecl, and + // NumLambdas, Lambdas[] from FunctionDecl, and // NumOverriddenMethods, OverriddenMethods[] from CXXMethodDecl. // // Add an AbbrevOp for 'size then elements' and use it here. diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp new file mode 100644 index 0000000000000..80844a58ad825 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp @@ -0,0 +1,76 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h + +//--- Conv.h +#pragma once + +template +_Up __declval(int); + +template +auto declval() noexcept -> decltype(__declval<_Tp>(0)); + +namespace folly { + +template +struct Expected { + template + auto thenOrThrow() -> decltype(declval()) { + return 1; + } +}; + +struct ExpectedHelper { + template + static constexpr Expected return_(T) { + return Expected(); + } + + template + static auto then_(This&&, Fn&&) + -> decltype(T::template return_((declval()(true), 0))) { + return Expected(); + } +}; + +template +inline Expected tryTo() { + Tgt result = 0; + // In build with asserts: + // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa(D) && "declaration not instantiated in this scope"' failed. + // In release build compilation error on the line below inside lambda: + // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized] + ExpectedHelper::then_(Expected(), [&](bool) { return result; }); + return {}; +} + +} // namespace folly + +inline void bar() { + folly::tryTo(); +} +// expected-no-diagnostics + +//--- folly-conv.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- thrift_cpp2_base.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- logger_base.h +#pragma once +import "folly-conv.h"; +import "thrift_cpp2_base.h"; + +inline void foo() { + folly::tryTo(); +} +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp new file mode 100644 index 0000000000000..5b1a904e928a6 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp @@ -0,0 +1,30 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h +// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp + +//--- header.h +template +void f(T) {} + +class A { + virtual ~A(); +}; + +inline A::~A() { + f([](){}); +} + +struct B { + void g() { + f([](){ + [](){}; + }); + } +}; +// expected-no-diagnostics + +//--- main.cpp +import "header.h"; +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp new file mode 100644 index 0000000000000..646ff9f745710 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t +// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// No crash or assertion failure on multiple nested lambdas deserialization. +template +void b() { + [] { + []{ + []{ + []{ + []{ + }(); + }(); + }(); + }(); + }(); +} + +void foo() { + b(); +} +#endif From 4d95a90a956dba62e04ed168d7f3dc9ed73959ef Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin Date: Thu, 19 Sep 2024 08:11:37 -0700 Subject: [PATCH 2/6] Instead of StmtVisitor use DeclContext visitor for lambda collection --- clang/lib/Serialization/ASTWriterDecl.cpp | 33 ++++++++--------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 732a6e21f340d..8f62b1a907ca8 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -626,30 +626,19 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { : QualType()); } -static llvm::SmallVector collectLambdas(FunctionDecl *D) { - struct LambdaCollector : public ConstStmtVisitor { - llvm::SmallVectorImpl &Lambdas; - - LambdaCollector(llvm::SmallVectorImpl &Lambdas) - : Lambdas(Lambdas) {} - - void VisitLambdaExpr(const LambdaExpr *E) { - VisitStmt(E); - Lambdas.push_back(E->getLambdaClass()); - } - - void VisitStmt(const Stmt *S) { - if (!S) - return; - for (const Stmt *Child : S->children()) - if (Child) - Visit(Child); +// Recursively collects all lambda declarations within the function declaration. +static llvm::SmallVector collectLambdas(FunctionDecl *FD) { + llvm::SmallVector Lambdas; + std::function visitor = [&visitor, + &Lambdas](DeclContext *DC) { + for (Decl *D : DC->decls()) { + if (isa(D)) + visitor(cast(D)); + if (auto *RD = dyn_cast(D); RD && RD->isLambda()) + Lambdas.push_back(RD); } }; - - llvm::SmallVector Lambdas; - if (D->hasBody()) - LambdaCollector(Lambdas).VisitStmt(D->getBody()); + visitor(FD); return Lambdas; } From 60fd20129304e184be69599c3e4f85ddc779ae29 Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin Date: Thu, 19 Sep 2024 08:46:15 -0700 Subject: [PATCH 3/6] To be on the safe side, assume that decls may be null --- clang/lib/Serialization/ASTWriterDecl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 8f62b1a907ca8..36dad89edcb1c 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -632,6 +632,8 @@ static llvm::SmallVector collectLambdas(FunctionDecl *FD) { std::function visitor = [&visitor, &Lambdas](DeclContext *DC) { for (Decl *D : DC->decls()) { + if (!D) + continue; if (isa(D)) visitor(cast(D)); if (auto *RD = dyn_cast(D); RD && RD->isLambda()) From 48937a9d452d5c8aa054571daff9c9f605e73fd1 Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin Date: Fri, 20 Sep 2024 09:34:23 -0700 Subject: [PATCH 4/6] Re-implement related lambda deseralization with own AST record --- .../include/clang/Serialization/ASTBitCodes.h | 3 ++ clang/include/clang/Serialization/ASTReader.h | 12 ++---- clang/include/clang/Serialization/ASTWriter.h | 3 ++ clang/lib/Serialization/ASTReader.cpp | 23 +++++------ clang/lib/Serialization/ASTReaderDecl.cpp | 18 ++++----- clang/lib/Serialization/ASTWriter.cpp | 19 ++++++++++ clang/lib/Serialization/ASTWriterDecl.cpp | 38 +++---------------- 7 files changed, 51 insertions(+), 65 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 4410df296d8ef..42cde55ba77ed 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -724,6 +724,9 @@ enum ASTRecordTypes { /// Record code for vtables to emit. VTABLES_TO_EMIT = 70, + + /// Record code for the FunctionDecl to lambdas mapping. + FUNCTION_DECL_TO_LAMBDAS_MAP = 71, }; /// Record types used within a source manager block. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 1b3812e7fd452..6005000a4b534 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -532,6 +532,9 @@ class ASTReader /// namespace as if it is not delayed. DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap; + /// Mapping from FunctionDecl IDs to the corresponding lambda IDs. + llvm::DenseMap> FunctionToLambdasMap; + struct PendingUpdateRecord { Decl *D; GlobalDeclID ID; @@ -1188,15 +1191,6 @@ class ASTReader /// once recursing loading has been completed. llvm::SmallVector PendingOdrMergeChecks; - /// Lambdas that need to be loaded immediately after the function they belong - /// to. It is necessary to have a canonical declaration for the lambda class - /// from the same module as the enclosing function. This requirement ensures - /// the correct resolution of captured variables in the lambda. Without this, - /// due to lazy deserialization, canonical declarations for the function and - /// lambdas can be from different modules, and DeclRefExprs may refer to AST - /// nodes that do not exist in the function. - SmallVector PendingLambdas; - using DataPointers = std::pair; using ObjCInterfaceDataPointers = diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 10a50b711043a..d196989dfafe1 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -233,6 +233,9 @@ class ASTWriter : public ASTDeserializationListener, /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. llvm::SmallPtrSet PredefinedDecls; + /// Map that provides the ID of function to the vector of lambdas inside it. + llvm::DenseMap> FunctionToLambdasMap; + /// Offset of each declaration in the bitstream, indexed by /// the declaration's ID. std::vector DeclOffsets; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index d7bad6bbad0c9..00e0c5c3fc1d0 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3856,6 +3856,15 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + case FUNCTION_DECL_TO_LAMBDAS_MAP: + for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) { + GlobalDeclID ID = ReadDeclID(F, Record, I); + auto& Lambdas = FunctionToLambdasMap[ID]; + for (unsigned II = 0, NN = Record[I++]; II < NN; II++) + Lambdas.push_back(ReadDeclID(F, Record, I)); + } + break; + case OBJC_CATEGORIES_MAP: if (F.LocalNumObjCCategoriesInMap != 0) return llvm::createStringError( @@ -9777,8 +9786,7 @@ void ASTReader::finishPendingActions() { !PendingDeducedVarTypes.empty() || !PendingIncompleteDeclChains.empty() || !PendingDeclChains.empty() || !PendingMacroIDs.empty() || !PendingDeclContextInfos.empty() || !PendingUpdateRecords.empty() || - !PendingObjCExtensionIvarRedeclarations.empty() || - !PendingLambdas.empty()) { + !PendingObjCExtensionIvarRedeclarations.empty()) { // If any identifiers with corresponding top-level declarations have // been loaded, load those declarations now. using TopLevelDeclsMap = @@ -9923,17 +9931,6 @@ void ASTReader::finishPendingActions() { } PendingObjCExtensionIvarRedeclarations.pop_back(); } - - // Load any pending lambdas. During the deserialization of pending lambdas, - // more lambdas can be discovered, so swap the current PendingLambdas with a - // local empty vector. Newly discovered lambdas will be deserialized in the - // next iteration. - if (!PendingLambdas.empty()) { - SmallVector DeclIDs; - DeclIDs.swap(PendingLambdas); - for (auto ID : DeclIDs) - GetDecl(ID); - } } // At this point, all update records for loaded decls are in place, so any diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 20e577404d997..1f7fc40fb2eed 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1155,16 +1155,6 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); FD->setParams(Reader.getContext(), Params); - - // For the first decl add all lambdas inside for loading them later, - // otherwise skip them. - unsigned NumLambdas = Record.readInt(); - if (FD->isFirstDecl()) { - for (unsigned I = 0; I != NumLambdas; ++I) - Reader.PendingLambdas.push_back(Record.readDeclID()); - } else { - Record.skipInts(NumLambdas); - } } void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) { @@ -4361,6 +4351,14 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } + + // Load any pending lambdas for the function. + if (auto *FD = dyn_cast(D); FD && FD->isCanonicalDecl()) { + if (auto IT = FunctionToLambdasMap.find(ID); IT != FunctionToLambdasMap.end()) { + for (auto LID : IT->second) + GetDecl(LID); + } + } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 008bf571f847d..8d60921d11e5d 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -903,6 +903,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(PENDING_IMPLICIT_INSTANTIATIONS); RECORD(UPDATE_VISIBLE); RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD); + RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP); RECORD(DECL_UPDATE_OFFSETS); RECORD(DECL_UPDATES); RECORD(CUDA_SPECIAL_DECL_REFS); @@ -5706,6 +5707,24 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD, DelayedNamespaceRecord); + if (!FunctionToLambdasMap.empty()) { + RecordData FunctionToLambdasMapRecord; + for (const auto &Pair : FunctionToLambdasMap) { + FunctionToLambdasMapRecord.push_back(GetDeclRef(Pair.first).getRawValue()); + FunctionToLambdasMapRecord.push_back(Pair.second.size()); + for (const auto &Lambda : Pair.second) + FunctionToLambdasMapRecord.push_back(Lambda.getRawValue()); + } + + auto Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv)); + Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord, + FunctionToLambdaMapAbbrev); + } + const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); // Create a lexical update block containing all of the declarations in the // translation unit that do not come from other AST files. diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 36dad89edcb1c..50c090b195d61 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -18,7 +18,6 @@ #include "clang/AST/Expr.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" -#include "clang/AST/StmtVisitor.h" #include "clang/Basic/SourceManager.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTRecordWriter.h" @@ -626,24 +625,6 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { : QualType()); } -// Recursively collects all lambda declarations within the function declaration. -static llvm::SmallVector collectLambdas(FunctionDecl *FD) { - llvm::SmallVector Lambdas; - std::function visitor = [&visitor, - &Lambdas](DeclContext *DC) { - for (Decl *D : DC->decls()) { - if (!D) - continue; - if (isa(D)) - visitor(cast(D)); - if (auto *RD = dyn_cast(D); RD && RD->isLambda()) - Lambdas.push_back(RD); - } - }; - visitor(FD); - return Lambdas; -} - void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { static_assert(DeclContext::NumFunctionDeclBits == 44, "You need to update the serializer after you change the " @@ -783,19 +764,6 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { Record.push_back(D->param_size()); for (auto *P : D->parameters()) Record.AddDeclRef(P); - - // Store references to all lambda decls inside function to load them - // immediately after loading the function to make sure that canonical - // decls for lambdas will be from the same module. - if (D->isCanonicalDecl()) { - llvm::SmallVector Lambdas = collectLambdas(D); - Record.push_back(Lambdas.size()); - for (const auto *L : Lambdas) - Record.AddDeclRef(L); - } else { - Record.push_back(0); - } - Code = serialization::DECL_FUNCTION; } @@ -1553,6 +1521,11 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { } else { Record.push_back(0); } + // For lambdas inside canonical FunctionDecl remember the mapping. + if (auto FD = llvm::dyn_cast_or_null(D->getDeclContext()); + FD && FD->isCanonicalDecl()) { + Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D)); + } } else { Record.push_back(CXXRecNotTemplate); } @@ -2271,7 +2244,6 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) { // // This is: // NumParams and Params[] from FunctionDecl, and - // NumLambdas, Lambdas[] from FunctionDecl, and // NumOverriddenMethods, OverriddenMethods[] from CXXMethodDecl. // // Add an AbbrevOp for 'size then elements' and use it here. From 4a853b492bac396c450925a3fce13d246d60f27e Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin Date: Fri, 20 Sep 2024 09:38:20 -0700 Subject: [PATCH 5/6] Fix clang-format issues --- clang/include/clang/Serialization/ASTReader.h | 3 ++- clang/include/clang/Serialization/ASTWriter.h | 3 ++- clang/lib/Serialization/ASTReader.cpp | 2 +- clang/lib/Serialization/ASTReaderDecl.cpp | 3 ++- clang/lib/Serialization/ASTWriter.cpp | 3 ++- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 6005000a4b534..757cac51b5171 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -533,7 +533,8 @@ class ASTReader DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap; /// Mapping from FunctionDecl IDs to the corresponding lambda IDs. - llvm::DenseMap> FunctionToLambdasMap; + llvm::DenseMap> + FunctionToLambdasMap; struct PendingUpdateRecord { Decl *D; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index d196989dfafe1..9dc01e75892f9 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -234,7 +234,8 @@ class ASTWriter : public ASTDeserializationListener, llvm::SmallPtrSet PredefinedDecls; /// Map that provides the ID of function to the vector of lambdas inside it. - llvm::DenseMap> FunctionToLambdasMap; + llvm::DenseMap> + FunctionToLambdasMap; /// Offset of each declaration in the bitstream, indexed by /// the declaration's ID. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 00e0c5c3fc1d0..e68eef99591ca 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3859,7 +3859,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, case FUNCTION_DECL_TO_LAMBDAS_MAP: for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) { GlobalDeclID ID = ReadDeclID(F, Record, I); - auto& Lambdas = FunctionToLambdasMap[ID]; + auto &Lambdas = FunctionToLambdasMap[ID]; for (unsigned II = 0, NN = Record[I++]; II < NN; II++) Lambdas.push_back(ReadDeclID(F, Record, I)); } diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 1f7fc40fb2eed..47e389201451d 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -4354,7 +4354,8 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { // Load any pending lambdas for the function. if (auto *FD = dyn_cast(D); FD && FD->isCanonicalDecl()) { - if (auto IT = FunctionToLambdasMap.find(ID); IT != FunctionToLambdasMap.end()) { + if (auto IT = FunctionToLambdasMap.find(ID); + IT != FunctionToLambdasMap.end()) { for (auto LID : IT->second) GetDecl(LID); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 8d60921d11e5d..c2952df3a0370 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5710,7 +5710,8 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { if (!FunctionToLambdasMap.empty()) { RecordData FunctionToLambdasMapRecord; for (const auto &Pair : FunctionToLambdasMap) { - FunctionToLambdasMapRecord.push_back(GetDeclRef(Pair.first).getRawValue()); + FunctionToLambdasMapRecord.push_back( + GetDeclRef(Pair.first).getRawValue()); FunctionToLambdasMapRecord.push_back(Pair.second.size()); for (const auto &Lambda : Pair.second) FunctionToLambdasMapRecord.push_back(Lambda.getRawValue()); From 1a8cf47804f7a231fb94f17e9a1868ce6ebca76d Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin Date: Tue, 24 Sep 2024 04:28:52 -0700 Subject: [PATCH 6/6] Add more comments --- clang/include/clang/Serialization/ASTBitCodes.h | 5 ++++- clang/include/clang/Serialization/ASTReader.h | 8 ++++++++ clang/include/clang/Serialization/ASTWriter.h | 6 +++++- clang/lib/Serialization/ASTReader.cpp | 4 +++- clang/lib/Serialization/ASTReaderDecl.cpp | 1 + clang/lib/Serialization/ASTWriter.cpp | 2 ++ 6 files changed, 23 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 42cde55ba77ed..5be33ae0ed1b9 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -725,7 +725,10 @@ enum ASTRecordTypes { /// Record code for vtables to emit. VTABLES_TO_EMIT = 70, - /// Record code for the FunctionDecl to lambdas mapping. + /// Record code for the FunctionDecl to lambdas mapping. These lambdas have to + /// be loaded right after the function they belong to. It is required to have + /// canonical declaration for the lambda class from the same module as + /// enclosing function. FUNCTION_DECL_TO_LAMBDAS_MAP = 71, }; diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 757cac51b5171..c1843218a4b8b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -533,6 +533,14 @@ class ASTReader DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap; /// Mapping from FunctionDecl IDs to the corresponding lambda IDs. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// It is required to have canonical declaration for lambda class from the + /// same module as enclosing function. This is required to correctly resolve + /// captured variables in the lambda. Without this, due to lazy + /// deserialization, canonical declarations for the function and lambdas can + /// be selected from different modules and DeclRefExprs may refer to the AST + /// nodes that don't exist in the function. llvm::DenseMap> FunctionToLambdasMap; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 9dc01e75892f9..760866fd9de93 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -233,7 +233,11 @@ class ASTWriter : public ASTDeserializationListener, /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. llvm::SmallPtrSet PredefinedDecls; - /// Map that provides the ID of function to the vector of lambdas inside it. + /// Mapping from FunctionDecl to the list of lambda IDs inside the function. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// In order to have canonical declaration for lambda class from the same + /// module as enclosing function during deserialization. llvm::DenseMap> FunctionToLambdasMap; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index e68eef99591ca..d1e11f4ddffd4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3860,7 +3860,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) { GlobalDeclID ID = ReadDeclID(F, Record, I); auto &Lambdas = FunctionToLambdasMap[ID]; - for (unsigned II = 0, NN = Record[I++]; II < NN; II++) + unsigned NN = Record[I++]; + Lambdas.reserve(NN); + for (unsigned II = 0; II < NN; II++) Lambdas.push_back(ReadDeclID(F, Record, I)); } break; diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 47e389201451d..7cead2728ca93 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -4358,6 +4358,7 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { IT != FunctionToLambdasMap.end()) { for (auto LID : IT->second) GetDecl(LID); + FunctionToLambdasMap.erase(IT); } } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c2952df3a0370..c3514d6e6805f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5708,6 +5708,8 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { DelayedNamespaceRecord); if (!FunctionToLambdasMap.empty()) { + // TODO: on disk hash table for function to lambda mapping might be more + // efficent becuase it allows lazy deserialization. RecordData FunctionToLambdasMapRecord; for (const auto &Pair : FunctionToLambdasMap) { FunctionToLambdasMapRecord.push_back(