Skip to content

Commit 4082a75

Browse files
authored
Improve stack usage to increase recursive initialization depth (#88546)
We were crashing due to stack exhaustion on rather reasonable C++ template code. After some investigation, I found that we have a stack-allocated object that was huge: `InitializationSequence` was 7016 bytes. This caused an overflow with deep call stacks in initialization code. With these change, `InitializationSequence` is now 248 bytes. With the original code, testing RelWithDebInfo on Windows 10, all the tests in SemaCXX took about 6s 800ms. The max template depth I could reach on my machine using the code in the issue was 708. After that, I would get `-Wstack-exhausted` warnings until crashing at 976 instantiations. With these changes on the same machine, all the tests in SemaCXX took about 6s 500ms. The max template depth I could reach was 1492. After that, I would get `-Wstack-exhausted` warnings until crashing at 2898 instantiations. This improves the behavior of #88330 but there's still an outstanding question of why we run out of stack space and crash in some circumstances before we're able to issue a diagnostic about stack space exhaustion.
1 parent 184ba03 commit 4082a75

File tree

5 files changed

+54
-75
lines changed

5 files changed

+54
-75
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,12 @@ Non-comprehensive list of changes in this release
203203
- ``__typeof_unqual__`` is available in all C modes as an extension, which behaves
204204
like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``.
205205

206+
- Improved stack usage with C++ initialization code. This allows significantly
207+
more levels of recursive initialization before reaching stack exhaustion
208+
limits. This will positively impact recursive template instantiation code,
209+
but should also reduce memory overhead for initializations in general.
210+
Fixes #GH88330
211+
206212
New Compiler Flags
207213
------------------
208214
- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and

clang/include/clang/Sema/Initialization.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,7 +1134,7 @@ class InitializationSequence {
11341134
OverloadingResult FailedOverloadResult;
11351135

11361136
/// The candidate set created when initialization failed.
1137-
OverloadCandidateSet FailedCandidateSet;
1137+
std::unique_ptr<OverloadCandidateSet> FailedCandidateSet;
11381138

11391139
/// The incomplete type that caused a failure.
11401140
QualType FailedIncompleteType;
@@ -1403,7 +1403,9 @@ class InitializationSequence {
14031403
/// Retrieve a reference to the candidate set when overload
14041404
/// resolution fails.
14051405
OverloadCandidateSet &getFailedCandidateSet() {
1406-
return FailedCandidateSet;
1406+
assert(FailedCandidateSet &&
1407+
"this should have been allocated in the constructor!");
1408+
return *FailedCandidateSet;
14071409
}
14081410

14091411
/// Get the overloading result, for when the initialization

clang/include/clang/Sema/Overload.h

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <cassert>
3838
#include <cstddef>
3939
#include <cstdint>
40+
#include <memory>
4041
#include <utility>
4142

4243
namespace clang {
@@ -874,7 +875,8 @@ class Sema;
874875
ConversionFixItGenerator Fix;
875876

876877
/// Viable - True to indicate that this overload candidate is viable.
877-
bool Viable : 1;
878+
LLVM_PREFERRED_TYPE(bool)
879+
unsigned Viable : 1;
878880

879881
/// Whether this candidate is the best viable function, or tied for being
880882
/// the best viable function.
@@ -883,12 +885,14 @@ class Sema;
883885
/// was part of the ambiguity kernel: the minimal non-empty set of viable
884886
/// candidates such that all elements of the ambiguity kernel are better
885887
/// than all viable candidates not in the ambiguity kernel.
886-
bool Best : 1;
888+
LLVM_PREFERRED_TYPE(bool)
889+
unsigned Best : 1;
887890

888891
/// IsSurrogate - True to indicate that this candidate is a
889892
/// surrogate for a conversion to a function pointer or reference
890893
/// (C++ [over.call.object]).
891-
bool IsSurrogate : 1;
894+
LLVM_PREFERRED_TYPE(bool)
895+
unsigned IsSurrogate : 1;
892896

893897
/// IgnoreObjectArgument - True to indicate that the first
894898
/// argument's conversion, which for this function represents the
@@ -897,18 +901,20 @@ class Sema;
897901
/// implicit object argument is just a placeholder) or a
898902
/// non-static member function when the call doesn't have an
899903
/// object argument.
900-
bool IgnoreObjectArgument : 1;
904+
LLVM_PREFERRED_TYPE(bool)
905+
unsigned IgnoreObjectArgument : 1;
901906

902907
/// True if the candidate was found using ADL.
903-
CallExpr::ADLCallKind IsADLCandidate : 1;
908+
LLVM_PREFERRED_TYPE(CallExpr::ADLCallKind)
909+
unsigned IsADLCandidate : 1;
904910

905911
/// Whether this is a rewritten candidate, and if so, of what kind?
906912
LLVM_PREFERRED_TYPE(OverloadCandidateRewriteKind)
907913
unsigned RewriteKind : 2;
908914

909915
/// FailureKind - The reason why this candidate is not viable.
910-
/// Actually an OverloadFailureKind.
911-
unsigned char FailureKind;
916+
LLVM_PREFERRED_TYPE(OverloadFailureKind)
917+
unsigned FailureKind : 5;
912918

913919
/// The number of call arguments that were explicitly provided,
914920
/// to be used while performing partial ordering of function templates.
@@ -972,7 +978,9 @@ class Sema;
972978
private:
973979
friend class OverloadCandidateSet;
974980
OverloadCandidate()
975-
: IsSurrogate(false), IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {}
981+
: IsSurrogate(false),
982+
IsADLCandidate(static_cast<unsigned>(CallExpr::NotADL)),
983+
RewriteKind(CRK_None) {}
976984
};
977985

978986
/// OverloadCandidateSet - A set of overload candidates, used in C++
@@ -1070,51 +1078,16 @@ class Sema;
10701078
};
10711079

10721080
private:
1073-
SmallVector<OverloadCandidate, 16> Candidates;
1074-
llvm::SmallPtrSet<uintptr_t, 16> Functions;
1075-
1076-
// Allocator for ConversionSequenceLists. We store the first few of these
1077-
// inline to avoid allocation for small sets.
1078-
llvm::BumpPtrAllocator SlabAllocator;
1081+
SmallVector<OverloadCandidate, 4> Candidates;
1082+
llvm::SmallPtrSet<uintptr_t, 4> Functions;
10791083

10801084
SourceLocation Loc;
10811085
CandidateSetKind Kind;
10821086
OperatorRewriteInfo RewriteInfo;
10831087

1084-
constexpr static unsigned NumInlineBytes =
1085-
24 * sizeof(ImplicitConversionSequence);
1086-
unsigned NumInlineBytesUsed = 0;
1087-
alignas(void *) char InlineSpace[NumInlineBytes];
1088-
10891088
// Address space of the object being constructed.
10901089
LangAS DestAS = LangAS::Default;
10911090

1092-
/// If we have space, allocates from inline storage. Otherwise, allocates
1093-
/// from the slab allocator.
1094-
/// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
1095-
/// instead.
1096-
/// FIXME: Now that this only allocates ImplicitConversionSequences, do we
1097-
/// want to un-generalize this?
1098-
template <typename T>
1099-
T *slabAllocate(unsigned N) {
1100-
// It's simpler if this doesn't need to consider alignment.
1101-
static_assert(alignof(T) == alignof(void *),
1102-
"Only works for pointer-aligned types.");
1103-
static_assert(std::is_trivial<T>::value ||
1104-
std::is_same<ImplicitConversionSequence, T>::value,
1105-
"Add destruction logic to OverloadCandidateSet::clear().");
1106-
1107-
unsigned NBytes = sizeof(T) * N;
1108-
if (NBytes > NumInlineBytes - NumInlineBytesUsed)
1109-
return SlabAllocator.Allocate<T>(N);
1110-
char *FreeSpaceStart = InlineSpace + NumInlineBytesUsed;
1111-
assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 &&
1112-
"Misaligned storage!");
1113-
1114-
NumInlineBytesUsed += NBytes;
1115-
return reinterpret_cast<T *>(FreeSpaceStart);
1116-
}
1117-
11181091
void destroyCandidates();
11191092

11201093
public:
@@ -1163,12 +1136,7 @@ class Sema;
11631136
ConversionSequenceList
11641137
allocateConversionSequences(unsigned NumConversions) {
11651138
ImplicitConversionSequence *Conversions =
1166-
slabAllocate<ImplicitConversionSequence>(NumConversions);
1167-
1168-
// Construct the new objects.
1169-
for (unsigned I = 0; I != NumConversions; ++I)
1170-
new (&Conversions[I]) ImplicitConversionSequence();
1171-
1139+
new ImplicitConversionSequence[NumConversions];
11721140
return ConversionSequenceList(Conversions, NumConversions);
11731141
}
11741142

clang/lib/Sema/SemaInit.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6114,7 +6114,8 @@ InitializationSequence::InitializationSequence(
61146114
Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind,
61156115
MultiExprArg Args, bool TopLevelOfInitList, bool TreatUnavailableAsInvalid)
61166116
: FailedOverloadResult(OR_Success),
6117-
FailedCandidateSet(Kind.getLocation(), OverloadCandidateSet::CSK_Normal) {
6117+
FailedCandidateSet(new OverloadCandidateSet(
6118+
Kind.getLocation(), OverloadCandidateSet::CSK_Normal)) {
61186119
InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList,
61196120
TreatUnavailableAsInvalid);
61206121
}
@@ -9735,7 +9736,7 @@ bool InitializationSequence::Diagnose(Sema &S,
97359736
switch (FailedOverloadResult) {
97369737
case OR_Ambiguous:
97379738

9738-
FailedCandidateSet.NoteCandidates(
9739+
FailedCandidateSet->NoteCandidates(
97399740
PartialDiagnosticAt(
97409741
Kind.getLocation(),
97419742
Failure == FK_UserConversionOverloadFailed
@@ -9749,7 +9750,8 @@ bool InitializationSequence::Diagnose(Sema &S,
97499750
break;
97509751

97519752
case OR_No_Viable_Function: {
9752-
auto Cands = FailedCandidateSet.CompleteCandidates(S, OCD_AllCandidates, Args);
9753+
auto Cands =
9754+
FailedCandidateSet->CompleteCandidates(S, OCD_AllCandidates, Args);
97539755
if (!S.RequireCompleteType(Kind.getLocation(),
97549756
DestType.getNonReferenceType(),
97559757
diag::err_typecheck_nonviable_condition_incomplete,
@@ -9759,13 +9761,13 @@ bool InitializationSequence::Diagnose(Sema &S,
97599761
<< OnlyArg->getType() << Args[0]->getSourceRange()
97609762
<< DestType.getNonReferenceType();
97619763

9762-
FailedCandidateSet.NoteCandidates(S, Args, Cands);
9764+
FailedCandidateSet->NoteCandidates(S, Args, Cands);
97639765
break;
97649766
}
97659767
case OR_Deleted: {
97669768
OverloadCandidateSet::iterator Best;
9767-
OverloadingResult Ovl
9768-
= FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
9769+
OverloadingResult Ovl =
9770+
FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
97699771

97709772
StringLiteral *Msg = Best->Function->getDeletedMessage();
97719773
S.Diag(Kind.getLocation(), diag::err_typecheck_deleted_function)
@@ -9949,7 +9951,7 @@ bool InitializationSequence::Diagnose(Sema &S,
99499951
// bad.
99509952
switch (FailedOverloadResult) {
99519953
case OR_Ambiguous:
9952-
FailedCandidateSet.NoteCandidates(
9954+
FailedCandidateSet->NoteCandidates(
99539955
PartialDiagnosticAt(Kind.getLocation(),
99549956
S.PDiag(diag::err_ovl_ambiguous_init)
99559957
<< DestType << ArgsRange),
@@ -10003,7 +10005,7 @@ bool InitializationSequence::Diagnose(Sema &S,
1000310005
break;
1000410006
}
1000510007

10006-
FailedCandidateSet.NoteCandidates(
10008+
FailedCandidateSet->NoteCandidates(
1000710009
PartialDiagnosticAt(
1000810010
Kind.getLocation(),
1000910011
S.PDiag(diag::err_ovl_no_viable_function_in_init)
@@ -10013,8 +10015,8 @@ bool InitializationSequence::Diagnose(Sema &S,
1001310015

1001410016
case OR_Deleted: {
1001510017
OverloadCandidateSet::iterator Best;
10016-
OverloadingResult Ovl
10017-
= FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
10018+
OverloadingResult Ovl =
10019+
FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
1001810020
if (Ovl != OR_Deleted) {
1001910021
S.Diag(Kind.getLocation(), diag::err_ovl_deleted_init)
1002010022
<< DestType << ArgsRange;
@@ -10093,8 +10095,8 @@ bool InitializationSequence::Diagnose(Sema &S,
1009310095
S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor)
1009410096
<< Args[0]->getSourceRange();
1009510097
OverloadCandidateSet::iterator Best;
10096-
OverloadingResult Ovl
10097-
= FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
10098+
OverloadingResult Ovl =
10099+
FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
1009810100
(void)Ovl;
1009910101
assert(Ovl == OR_Success && "Inconsistent overload resolution");
1010010102
CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);

clang/lib/Sema/SemaOverload.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,17 +1057,14 @@ bool OverloadCandidateSet::OperatorRewriteInfo::shouldAddReversed(
10571057

10581058
void OverloadCandidateSet::destroyCandidates() {
10591059
for (iterator i = begin(), e = end(); i != e; ++i) {
1060-
for (auto &C : i->Conversions)
1061-
C.~ImplicitConversionSequence();
1060+
delete[] i->Conversions.data();
10621061
if (!i->Viable && i->FailureKind == ovl_fail_bad_deduction)
10631062
i->DeductionFailure.Destroy();
10641063
}
10651064
}
10661065

10671066
void OverloadCandidateSet::clear(CandidateSetKind CSK) {
10681067
destroyCandidates();
1069-
SlabAllocator.Reset();
1070-
NumInlineBytesUsed = 0;
10711068
Candidates.clear();
10721069
Functions.clear();
10731070
Kind = CSK;
@@ -6983,7 +6980,7 @@ void Sema::AddOverloadCandidate(
69836980
Candidate.RewriteKind =
69846981
CandidateSet.getRewriteInfo().getRewriteKind(Function, PO);
69856982
Candidate.IsSurrogate = false;
6986-
Candidate.IsADLCandidate = IsADLCandidate;
6983+
Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
69876984
Candidate.IgnoreObjectArgument = false;
69886985
Candidate.ExplicitCallArguments = Args.size();
69896986

@@ -7815,7 +7812,7 @@ void Sema::AddTemplateOverloadCandidate(
78157812
Candidate.RewriteKind =
78167813
CandidateSet.getRewriteInfo().getRewriteKind(Candidate.Function, PO);
78177814
Candidate.IsSurrogate = false;
7818-
Candidate.IsADLCandidate = IsADLCandidate;
7815+
Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
78197816
// Ignore the object argument if there is one, since we don't have an object
78207817
// type.
78217818
Candidate.IgnoreObjectArgument =
@@ -14125,7 +14122,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
1412514122
return ExprError();
1412614123
return SemaRef.BuildResolvedCallExpr(
1412714124
Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
14128-
/*IsExecConfig=*/false, (*Best)->IsADLCandidate);
14125+
/*IsExecConfig=*/false,
14126+
static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
1412914127
}
1413014128

1413114129
case OR_No_Viable_Function: {
@@ -14184,7 +14182,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
1418414182
return ExprError();
1418514183
return SemaRef.BuildResolvedCallExpr(
1418614184
Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
14187-
/*IsExecConfig=*/false, (*Best)->IsADLCandidate);
14185+
/*IsExecConfig=*/false,
14186+
static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
1418814187
}
1418914188
}
1419014189

@@ -14491,7 +14490,8 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
1449114490
Args[0] = Input;
1449214491
CallExpr *TheCall = CXXOperatorCallExpr::Create(
1449314492
Context, Op, FnExpr.get(), ArgsArray, ResultTy, VK, OpLoc,
14494-
CurFPFeatureOverrides(), Best->IsADLCandidate);
14493+
CurFPFeatureOverrides(),
14494+
static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
1449514495

1449614496
if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall, FnDecl))
1449714497
return ExprError();
@@ -14909,7 +14909,8 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
1490914909
// members; CodeGen should take care not to emit the this pointer.
1491014910
TheCall = CXXOperatorCallExpr::Create(
1491114911
Context, ChosenOp, FnExpr.get(), Args, ResultTy, VK, OpLoc,
14912-
CurFPFeatureOverrides(), Best->IsADLCandidate);
14912+
CurFPFeatureOverrides(),
14913+
static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
1491314914

1491414915
if (const auto *Method = dyn_cast<CXXMethodDecl>(FnDecl);
1491514916
Method && Method->isImplicitObjectMemberFunction()) {

0 commit comments

Comments
 (0)