Skip to content

Commit d50eaac

Browse files
Revert "[clang][CodeGen] Zero init unspecified fields in initializers in C" (#109898)
Reverts #97121 Causing failures on LNT bots; log shows a crash in ConstStructBuilder::BuildStruct.
1 parent 18b9d49 commit d50eaac

23 files changed

+96
-658
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5860,26 +5860,3 @@ specify the starting offset to begin embedding from. The resources is treated
58605860
as being empty if the specified offset is larger than the number of bytes in
58615861
the resource. The offset will be applied *before* any ``limit`` parameters are
58625862
applied.
5863-
5864-
Union and aggregate initialization in C
5865-
=======================================
5866-
5867-
In C23 (N2900), when an object is initialized from initializer ``= {}``, all
5868-
elements of arrays, all members of structs, and the first members of unions are
5869-
empty-initialized recursively. In addition, all padding bits are initialized to
5870-
zero.
5871-
5872-
Clang guarantees the following behaviors:
5873-
5874-
* ``1:`` Clang supports initializer ``= {}`` mentioned above in all C
5875-
standards.
5876-
5877-
* ``2:`` When unions are initialized from initializer ``= {}``, bytes outside
5878-
of the first members of unions are also initialized to zero.
5879-
5880-
* ``3:`` When unions, structures and arrays are initialized from initializer
5881-
``= { initializer-list }``, all members not explicitly initialized in
5882-
the initializer list are empty-initialized recursively. In addition, all
5883-
padding bits are initialized to zero.
5884-
5885-
Currently, the above extension only applies to C source code, not C++.

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,17 +1698,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
16981698
// Prepare a 'this' for CXXDefaultInitExprs.
16991699
CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
17001700

1701-
const bool ZeroInitPadding =
1702-
CGF.CGM.shouldZeroInitPadding() && !Dest.isZeroed();
1703-
const Address BaseLoc = Dest.getAddress().withElementType(CGF.Int8Ty);
1704-
auto DoZeroInitPadding = [&](CharUnits Offset, CharUnits Size) {
1705-
if (Size.isPositive()) {
1706-
Address Loc = CGF.Builder.CreateConstGEP(BaseLoc, Offset.getQuantity());
1707-
llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity());
1708-
CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false);
1709-
}
1710-
};
1711-
17121701
if (record->isUnion()) {
17131702
// Only initialize one field of a union. The field itself is
17141703
// specified by the initializer list.
@@ -1733,37 +1722,17 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17331722
if (NumInitElements) {
17341723
// Store the initializer into the field
17351724
EmitInitializationToLValue(InitExprs[0], FieldLoc);
1736-
if (ZeroInitPadding) {
1737-
CharUnits TotalSize =
1738-
Dest.getPreferredSize(CGF.getContext(), DestLV.getType());
1739-
CharUnits FieldSize =
1740-
CGF.getContext().getTypeSizeInChars(FieldLoc.getType());
1741-
DoZeroInitPadding(FieldSize, TotalSize - FieldSize);
1742-
}
17431725
} else {
17441726
// Default-initialize to null.
1745-
if (ZeroInitPadding)
1746-
EmitNullInitializationToLValue(DestLV);
1747-
else
1748-
EmitNullInitializationToLValue(FieldLoc);
1727+
EmitNullInitializationToLValue(FieldLoc);
17491728
}
1729+
17501730
return;
17511731
}
17521732

17531733
// Here we iterate over the fields; this makes it simpler to both
17541734
// default-initialize fields and skip over unnamed fields.
1755-
const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(record);
1756-
CharUnits SizeSoFar = CharUnits::Zero();
17571735
for (const auto *field : record->fields()) {
1758-
if (ZeroInitPadding) {
1759-
unsigned FieldNo = field->getFieldIndex();
1760-
CharUnits Offset =
1761-
CGF.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo));
1762-
DoZeroInitPadding(SizeSoFar, Offset - SizeSoFar);
1763-
CharUnits FieldSize =
1764-
CGF.getContext().getTypeSizeInChars(field->getType());
1765-
SizeSoFar = Offset + FieldSize;
1766-
}
17671736
// We're done once we hit the flexible array member.
17681737
if (field->getType()->isIncompleteArrayType())
17691738
break;
@@ -1805,11 +1774,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
18051774
}
18061775
}
18071776
}
1808-
if (ZeroInitPadding) {
1809-
CharUnits TotalSize =
1810-
Dest.getPreferredSize(CGF.getContext(), DestLV.getType());
1811-
DoZeroInitPadding(SizeSoFar, TotalSize - SizeSoFar);
1812-
}
18131777
}
18141778

18151779
void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,

clang/lib/CodeGen/CGExprConstant.cpp

Lines changed: 11 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,6 @@ using namespace CodeGen;
4242
namespace {
4343
class ConstExprEmitter;
4444

45-
llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) {
46-
llvm::Type *Ty = CGM.CharTy;
47-
if (PadSize > CharUnits::One())
48-
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
49-
if (CGM.shouldZeroInitPadding()) {
50-
return llvm::Constant::getNullValue(Ty);
51-
}
52-
return llvm::UndefValue::get(Ty);
53-
}
54-
5545
struct ConstantAggregateBuilderUtils {
5646
CodeGenModule &CGM;
5747

@@ -71,7 +61,10 @@ struct ConstantAggregateBuilderUtils {
7161
}
7262

7363
llvm::Constant *getPadding(CharUnits PadSize) const {
74-
return ::getPadding(CGM, PadSize);
64+
llvm::Type *Ty = CGM.CharTy;
65+
if (PadSize > CharUnits::One())
66+
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
67+
return llvm::UndefValue::get(Ty);
7568
}
7669

7770
llvm::Constant *getZeroes(CharUnits ZeroSize) const {
@@ -598,11 +591,6 @@ class ConstStructBuilder {
598591
bool Build(const InitListExpr *ILE, bool AllowOverwrite);
599592
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
600593
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
601-
bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo,
602-
const FieldDecl &Field, bool AllowOverwrite,
603-
CharUnits &FieldSize, CharUnits &SizeSoFar);
604-
bool DoZeroInitPadding(const ASTRecordLayout &Layout, bool AllowOverwrite,
605-
CharUnits &SizeSoFar);
606594
llvm::Constant *Finalize(QualType Ty);
607595
};
608596

@@ -727,10 +715,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
727715
if (CXXRD->getNumBases())
728716
return false;
729717

730-
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
731-
CharUnits FieldSize = CharUnits::Zero();
732-
CharUnits SizeSoFar = CharUnits::Zero();
733-
734718
for (FieldDecl *Field : RD->fields()) {
735719
++FieldNo;
736720

@@ -748,13 +732,8 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
748732
const Expr *Init = nullptr;
749733
if (ElementNo < ILE->getNumInits())
750734
Init = ILE->getInit(ElementNo++);
751-
if (isa_and_nonnull<NoInitExpr>(Init)) {
752-
if (ZeroInitPadding &&
753-
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize,
754-
SizeSoFar))
755-
return false;
735+
if (isa_and_nonnull<NoInitExpr>(Init))
756736
continue;
757-
}
758737

759738
// Zero-sized fields are not emitted, but their initializers may still
760739
// prevent emission of this struct as a constant.
@@ -764,11 +743,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
764743
continue;
765744
}
766745

767-
if (ZeroInitPadding &&
768-
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize,
769-
SizeSoFar))
770-
return false;
771-
772746
// When emitting a DesignatedInitUpdateExpr, a nested InitListExpr
773747
// represents additional overwriting of our current constant value, and not
774748
// a new constant to emit independently.
@@ -794,10 +768,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
794768
if (!EltInit)
795769
return false;
796770

797-
if (ZeroInitPadding && FieldSize.isZero())
798-
SizeSoFar += CharUnits::fromQuantity(
799-
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
800-
801771
if (!Field->isBitField()) {
802772
// Handle non-bitfield members.
803773
if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit,
@@ -815,9 +785,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
815785
}
816786
}
817787

818-
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
819-
return false;
820-
821788
return true;
822789
}
823790

@@ -882,9 +849,6 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
882849

883850
unsigned FieldNo = 0;
884851
uint64_t OffsetBits = CGM.getContext().toBits(Offset);
885-
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
886-
CharUnits FieldSize = CharUnits::Zero();
887-
CharUnits SizeSoFar = CharUnits::Zero();
888852

889853
bool AllowOverwrite = false;
890854
for (RecordDecl::field_iterator Field = RD->field_begin(),
@@ -906,15 +870,6 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
906870
if (!EltInit)
907871
return false;
908872

909-
if (ZeroInitPadding) {
910-
if (!DoZeroInitPadding(Layout, FieldNo, **Field, AllowOverwrite,
911-
FieldSize, SizeSoFar))
912-
return false;
913-
if (FieldSize.isZero())
914-
SizeSoFar += CharUnits::fromQuantity(
915-
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
916-
}
917-
918873
if (!Field->isBitField()) {
919874
// Handle non-bitfield members.
920875
if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
@@ -931,35 +886,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
931886
return false;
932887
}
933888
}
934-
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
935-
return false;
936-
937-
return true;
938-
}
939889

940-
bool ConstStructBuilder::DoZeroInitPadding(
941-
const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field,
942-
bool AllowOverwrite, CharUnits &FieldSize, CharUnits &SizeSoFar) {
943-
CharUnits Offset =
944-
CGM.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo));
945-
if (SizeSoFar < Offset)
946-
if (!AppendBytes(SizeSoFar, getPadding(CGM, Offset - SizeSoFar),
947-
AllowOverwrite))
948-
return false;
949-
FieldSize = CGM.getContext().getTypeSizeInChars(Field.getType());
950-
SizeSoFar = Offset + FieldSize;
951-
return true;
952-
}
953-
954-
bool ConstStructBuilder::DoZeroInitPadding(const ASTRecordLayout &Layout,
955-
bool AllowOverwrite,
956-
CharUnits &SizeSoFar) {
957-
CharUnits TotalSize = Layout.getSize();
958-
if (SizeSoFar < TotalSize)
959-
if (!AppendBytes(SizeSoFar, getPadding(CGM, TotalSize - SizeSoFar),
960-
AllowOverwrite))
961-
return false;
962-
SizeSoFar = TotalSize;
963890
return true;
964891
}
965892

@@ -1200,10 +1127,12 @@ class ConstExprEmitter
12001127

12011128
assert(CurSize <= TotalSize && "Union size mismatch!");
12021129
if (unsigned NumPadBytes = TotalSize - CurSize) {
1203-
llvm::Constant *Padding =
1204-
getPadding(CGM, CharUnits::fromQuantity(NumPadBytes));
1205-
Elts.push_back(Padding);
1206-
Types.push_back(Padding->getType());
1130+
llvm::Type *Ty = CGM.CharTy;
1131+
if (NumPadBytes > 1)
1132+
Ty = llvm::ArrayType::get(Ty, NumPadBytes);
1133+
1134+
Elts.push_back(llvm::UndefValue::get(Ty));
1135+
Types.push_back(Ty);
12071136
}
12081137

12091138
llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false);

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,57 +1676,6 @@ class CodeGenModule : public CodeGenTypeCache {
16761676
MustTailCallUndefinedGlobals.insert(Global);
16771677
}
16781678

1679-
bool shouldZeroInitPadding() const {
1680-
// In C23 (N3096) $6.7.10:
1681-
// """
1682-
// If any object is initialized with an empty iniitializer, then it is
1683-
// subject to default initialization:
1684-
// - if it is an aggregate, every member is initialized (recursively)
1685-
// according to these rules, and any padding is initialized to zero bits;
1686-
// - if it is a union, the first named member is initialized (recursively)
1687-
// according to these rules, and any padding is initialized to zero bits.
1688-
//
1689-
// If the aggregate or union contains elements or members that are
1690-
// aggregates or unions, these rules apply recursively to the subaggregates
1691-
// or contained unions.
1692-
//
1693-
// If there are fewer initializers in a brace-enclosed list than there are
1694-
// elements or members of an aggregate, or fewer characters in a string
1695-
// literal used to initialize an array of known size than there are elements
1696-
// in the array, the remainder of the aggregate is subject to default
1697-
// initialization.
1698-
// """
1699-
//
1700-
// From my understanding, the standard is ambiguous in the following two
1701-
// areas:
1702-
// 1. For a union type with empty initializer, if the first named member is
1703-
// not the largest member, then the bytes comes after the first named member
1704-
// but before padding are left unspecified. An example is:
1705-
// union U { int a; long long b;};
1706-
// union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left
1707-
// unspecified.
1708-
//
1709-
// 2. It only mentions padding for empty initializer, but doesn't mention
1710-
// padding for a non empty initialization list. And if the aggregation or
1711-
// union contains elements or members that are aggregates or unions, and
1712-
// some are non empty initializers, while others are empty initiailizers,
1713-
// the padding initialization is unclear. An example is:
1714-
// struct S1 { int a; long long b; };
1715-
// struct S2 { char c; struct S1 s1; };
1716-
// // The values for paddings between s2.c and s2.s1.a, between s2.s1.a
1717-
// and s2.s1.b are unclear.
1718-
// struct S2 s2 = { 'c' };
1719-
//
1720-
// Here we choose to zero initiailize left bytes of a union type. Because
1721-
// projects like the Linux kernel are relying on this behavior. If we don't
1722-
// explicitly zero initialize them, the undef values can be optimized to
1723-
// return gabage data. We also choose to zero initialize paddings for
1724-
// aggregates and unions, no matter they are initialized by empty
1725-
// initializers or non empty initializers. This can provide a consistent
1726-
// behavior. So projects like the Linux kernel can rely on it.
1727-
return !getLangOpts().CPlusPlus;
1728-
}
1729-
17301679
private:
17311680
bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const;
17321681

clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ struct et7 {
88
52,
99
};
1010

11-
// CHECK: @yv7 ={{.*}} global { [0 x float], i8, [3 x i8] } { [0 x float] zeroinitializer, i8 52, [3 x i8] zeroinitializer }
11+
// CHECK: @yv7 ={{.*}} global %struct.et7 { [0 x float] zeroinitializer, i8 52 }

clang/test/CodeGen/2008-08-07-AlignPadding1.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ struct gc_generation {
2020

2121
#define GEN_HEAD(n) (&generations[n].head)
2222

23-
// The idea is that there are 6 zeroinitializers in this structure initializer to cover
23+
// The idea is that there are 6 undefs in this structure initializer to cover
2424
// the padding between elements.
25-
// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] zeroinitializer }, i32 700, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }]
25+
// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }]
2626
/* linked lists of container objects */
2727
struct gc_generation generations[3] = {
2828
/* PyGC_Head, threshold, count */

clang/test/CodeGen/2009-06-14-anonymous-union-init.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ struct sysfs_dirent {
77
};
88
struct sysfs_dirent sysfs_root = { {}, 16877 };
99

10-
// CHECK: @sysfs_root = {{.*}}global { %union.anon, i16, [2 x i8] } { %union.anon zeroinitializer, i16 16877, [2 x i8] zeroinitializer }
10+
// CHECK: @sysfs_root = {{.*}}global %struct.sysfs_dirent { %union.anon zeroinitializer, i16 16877 }
1111

1212
struct Foo {
1313
union { struct empty {} x; };
@@ -16,4 +16,4 @@ struct Foo {
1616
struct Foo foo = { {}, 16877 };
1717

1818
// EMPTY: @foo = {{.*}}global %struct.Foo { i16 16877 }
19-
// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] zeroinitializer, i16 16877 }
19+
// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] undef, i16 16877 }

0 commit comments

Comments
 (0)