Skip to content

Commit 180f803

Browse files
[clang] Fix ASTWriter crash after merging named enums (#114240)
Clang already removes parsed enumerators when merging typedefs to anonymous enums. This is why the following example decl used to be handled correctly while merging, and ASTWriter behaves as expected: ```c typedef enum { Val } AnonEnum; ``` However, the mentioned mechanism didn't handle named enums. This leads to stale declarations in `IdResolver`, causing an assertion violation in ASTWriter ``Assertion `DeclIDs.contains(D) && "Declaration not emitted!"' failed`` when a module is being serialized with the following example merged enums: ```c typedef enum Enum1 { Val_A } Enum1; enum Enum2 { Val_B }; ``` The PR applies the same mechanism in the named enums case. Additionally, I dropped the call to `getLexicalDeclContext()->removeDecl` as it was causing a wrong odr-violation diagnostic with anonymous enums. Might be easier to to review commit by commit. Any feedback is appreciated. ### Context This fixes frontend crashes that were encountered when certain Objective-C modules are included on Xcode 16. For example, by running `CC=/path/to/clang-19 xcodebuild clean build` on a project that contains the following Objective-C file: ```c #include <os/atomic.h> int main() { return memory_order_relaxed; } ``` This crashes the parser in release, when ASTReader tries to load the enumerator declaration.
1 parent 21610e3 commit 180f803

File tree

6 files changed

+139
-16
lines changed

6 files changed

+139
-16
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ Bug Fixes in This Version
248248

249249
- Clang now outputs correct values when #embed data contains bytes with negative
250250
signed char values (#GH102798).
251+
- Fixed a crash when merging named enumerations in modules (#GH114240).
251252
- Fixed rejects-valid problem when #embed appears in std::initializer_list or
252253
when it can affect template argument deduction (#GH122306).
253254
- Fix crash on code completion of function calls involving partial order of function templates

clang/include/clang/Sema/Sema.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4008,7 +4008,7 @@ class Sema final : public SemaBase {
40084008
/// Perform ODR-like check for C/ObjC when merging tag types from modules.
40094009
/// Differently from C++, actually parse the body and reject / error out
40104010
/// in case of a structural mismatch.
4011-
bool ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody);
4011+
bool ActOnDuplicateDefinition(Scope *S, Decl *Prev, SkipBodyInfo &SkipBody);
40124012

40134013
typedef void *SkippedDefinitionContext;
40144014

@@ -4132,6 +4132,12 @@ class Sema final : public SemaBase {
41324132
void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
41334133
LookupResult &OldDecls);
41344134

4135+
/// CleanupMergedEnum - We have just merged the decl 'New' by making another
4136+
/// definition visible.
4137+
/// This method performs any necessary cleanup on the parser state to discard
4138+
/// child nodes from newly parsed decl we are retiring.
4139+
void CleanupMergedEnum(Scope *S, Decl *New);
4140+
41354141
/// MergeFunctionDecl - We just parsed a function 'New' from
41364142
/// declarator D which has the same name and scope as a previous
41374143
/// declaration 'Old'. Figure out how to resolve this situation,

clang/lib/Parse/ParseDecl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5652,7 +5652,7 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
56525652
Decl *D = SkipBody.CheckSameAsPrevious ? SkipBody.New : TagDecl;
56535653
ParseEnumBody(StartLoc, D);
56545654
if (SkipBody.CheckSameAsPrevious &&
5655-
!Actions.ActOnDuplicateDefinition(TagDecl, SkipBody)) {
5655+
!Actions.ActOnDuplicateDefinition(getCurScope(), TagDecl, SkipBody)) {
56565656
DS.SetTypeSpecError();
56575657
return;
56585658
}

clang/lib/Parse/ParseDeclCXX.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2350,7 +2350,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
23502350
// Parse the definition body.
23512351
ParseStructUnionBody(StartLoc, TagType, cast<RecordDecl>(D));
23522352
if (SkipBody.CheckSameAsPrevious &&
2353-
!Actions.ActOnDuplicateDefinition(TagOrTempResult.get(), SkipBody)) {
2353+
!Actions.ActOnDuplicateDefinition(getCurScope(),
2354+
TagOrTempResult.get(), SkipBody)) {
23542355
DS.SetTypeSpecError();
23552356
return;
23562357
}

clang/lib/Sema/SemaDecl.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2564,18 +2564,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
25642564
// Make the old tag definition visible.
25652565
makeMergedDefinitionVisible(Hidden);
25662566

2567-
// If this was an unscoped enumeration, yank all of its enumerators
2568-
// out of the scope.
2569-
if (isa<EnumDecl>(NewTag)) {
2570-
Scope *EnumScope = getNonFieldDeclScope(S);
2571-
for (auto *D : NewTag->decls()) {
2572-
auto *ED = cast<EnumConstantDecl>(D);
2573-
assert(EnumScope->isDeclScope(ED));
2574-
EnumScope->RemoveDecl(ED);
2575-
IdResolver.RemoveDecl(ED);
2576-
ED->getLexicalDeclContext()->removeDecl(ED);
2577-
}
2578-
}
2567+
CleanupMergedEnum(S, NewTag);
25792568
}
25802569
}
25812570

@@ -2652,6 +2641,19 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
26522641
notePreviousDefinition(Old, New->getLocation());
26532642
}
26542643

2644+
void Sema::CleanupMergedEnum(Scope *S, Decl *New) {
2645+
// If this was an unscoped enumeration, yank all of its enumerators
2646+
// out of the scope.
2647+
if (auto *ED = dyn_cast<EnumDecl>(New); ED && !ED->isScoped()) {
2648+
Scope *EnumScope = getNonFieldDeclScope(S);
2649+
for (auto *ECD : ED->enumerators()) {
2650+
assert(EnumScope->isDeclScope(ECD));
2651+
EnumScope->RemoveDecl(ECD);
2652+
IdResolver.RemoveDecl(ECD);
2653+
}
2654+
}
2655+
}
2656+
26552657
/// DeclhasAttr - returns true if decl Declaration already has the target
26562658
/// attribute.
26572659
static bool DeclHasAttr(const Decl *D, const Attr *A) {
@@ -18347,12 +18349,14 @@ void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) {
1834718349
AddPushedVisibilityAttribute(Tag);
1834818350
}
1834918351

18350-
bool Sema::ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody) {
18352+
bool Sema::ActOnDuplicateDefinition(Scope *S, Decl *Prev,
18353+
SkipBodyInfo &SkipBody) {
1835118354
if (!hasStructuralCompatLayout(Prev, SkipBody.New))
1835218355
return false;
1835318356

1835418357
// Make the previous decl visible.
1835518358
makeMergedDefinitionVisible(SkipBody.Previous);
18359+
CleanupMergedEnum(S, SkipBody.New);
1835618360
return true;
1835718361
}
1835818362

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// RUN: rm -rf %t
2+
// RUN: split-file %s %t
3+
4+
5+
// Expect no crash
6+
// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modcache -fmodule-map-file=%t/module.modulemap %t/source.m
7+
8+
// Add -ast-dump-all to check that the AST nodes are merged correctly.
9+
// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modcache -fmodule-map-file=%t/module.modulemap %t/source.m -ast-dump-all 2>&1 | FileCheck %s
10+
11+
12+
//--- shared.h
13+
// This header is shared between two modules, but it's not a module itself.
14+
// The enums defined here are parsed in both modules, and merged while building ModB.
15+
16+
typedef enum MyEnum1 { MyVal_A } MyEnum1;
17+
// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum1
18+
// CHECK-NEXT: | |-also in ModB
19+
// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_A 'int'
20+
// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum1 'enum MyEnum1'
21+
// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported
22+
// CHECK-NEXT: | `-EnumType 0x{{.*}} 'enum MyEnum1' imported
23+
// CHECK-NEXT: | `-Enum 0x{{.*}} 'MyEnum1'
24+
25+
26+
enum MyEnum2 { MyVal_B };
27+
// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum2
28+
// CHECK-NEXT: | |-also in ModB
29+
// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_B 'int'
30+
31+
32+
typedef enum { MyVal_C } MyEnum3;
33+
// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations>
34+
// CHECK-NEXT: | |-also in ModB
35+
// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_C 'int'
36+
// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum3 'enum MyEnum3':'MyEnum3'
37+
// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported
38+
// CHECK-NEXT: | `-EnumType 0x{{.*}} 'MyEnum3' imported
39+
// CHECK-NEXT: | `-Enum 0x{{.*}}
40+
41+
struct MyStruct {
42+
enum MyEnum5 { MyVal_D } Field;
43+
};
44+
45+
// CHECK: |-RecordDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> struct MyStruct definition
46+
// CHECK-NEXT: | |-also in ModB
47+
// CHECK-NEXT: | |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum5
48+
// CHECK-NEXT: | | |-also in ModB
49+
// CHECK-NEXT: | | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_D 'int'
50+
// CHECK-NEXT: | `-FieldDecl 0x{{.*}} imported in ModA.ModAFile1 hidden Field 'enum MyEnum5'
51+
52+
// In this case, no merging happens on the EnumDecl in Objective-C, and ASTWriter writes both EnumConstantDecls when building ModB.
53+
enum { MyVal_E };
54+
// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 hidden <undeserialized declarations>
55+
// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyVal_E 'int'
56+
57+
58+
// Redeclarations coming from ModB.
59+
// CHECK: |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum1 'enum MyEnum1'
60+
// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported
61+
// CHECK-NEXT: | `-EnumType 0x{{.*}} 'enum MyEnum1' imported
62+
// CHECK-NEXT: | `-Enum 0x{{.*}} 'MyEnum1'
63+
64+
// CHECK: |-EnumDecl 0x{{.*}} prev 0x{{.*}} imported in ModB <undeserialized declarations>
65+
// CHECK-NEXT: | |-also in ModB
66+
// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModB MyVal_C 'int'
67+
// CHECK-NEXT: |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum3 'enum MyEnum3':'MyEnum3'
68+
// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported
69+
// CHECK-NEXT: | `-EnumType 0x{{.*}} 'MyEnum3' imported
70+
// CHECK-NEXT: | `-Enum 0x{{.*}}
71+
72+
// CHECK: |-EnumDecl 0x{{.*}} imported in ModB <undeserialized declarations>
73+
// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} first 0x{{.*}} imported in ModB referenced MyVal_E 'int'
74+
75+
76+
77+
//--- module.modulemap
78+
module ModA {
79+
module ModAFile1 {
80+
header "ModAFile1.h"
81+
export *
82+
}
83+
module ModAFile2 {
84+
header "ModAFile2.h"
85+
export *
86+
}
87+
}
88+
// The goal of writing ModB is to test that ASTWriter can handle the merged AST nodes correctly.
89+
// For example, a stale declaration in IdResolver can cause an assertion failure while writing the identifier table.
90+
module ModB {
91+
header "ModBFile.h"
92+
export *
93+
}
94+
95+
//--- ModAFile1.h
96+
#include "shared.h"
97+
98+
//--- ModAFile2.h
99+
// Including this file, triggers loading of the module ModA with nodes coming ModAFile1.h being hidden.
100+
101+
//--- ModBFile.h
102+
// ModBFile depends on ModAFile2.h only.
103+
#include "ModAFile2.h"
104+
// Including shared.h here causes Sema to merge the AST nodes from shared.h with the hidden ones from ModA.
105+
#include "shared.h"
106+
107+
108+
//--- source.m
109+
#include "ModBFile.h"
110+
111+
int main() { return MyVal_A + MyVal_B + MyVal_C + MyVal_D + MyVal_E; }

0 commit comments

Comments
 (0)