-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Fix incorrect array initialization with string literal (fixes #112189) #156846
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-clang Author: None (awson) ChangesFollowing the suggestion of @zygoloid Done as a separate static function and not inlined directly into the I probably should have done this for the dependent case only, but the function is buried deep in the call stack and I didn't bother. Full diff: https://github.com/llvm/llvm-project/pull/156846.diff 2 Files Affected:
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index c97129336736b..b1cffe062d0a7 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -168,9 +168,85 @@ bool Sema::IsStringInit(Expr *Init, const ArrayType *AT) {
return ::IsStringInit(Init, AT, Context) == SIF_None;
}
+static StringLiteral *CloneStringLiteral(const StringLiteral *SL,
+ ASTContext &C) {
+ return StringLiteral::Create(C, SL->getString(), SL->getKind(),
+ SL->isPascal(), SL->getType(),
+ SL->getBeginLoc());
+}
+
+// Exactly follow `IgnoreParensSingleStep` (`AST/IgnoreExpr.h`)
+// We only clone those subexpressions which `IgnoreParensSingleStep` drills down
+// to.
+static Expr *CloneDrilled(Expr *E, ASTContext &C) {
+ if (auto *SL = dyn_cast<StringLiteral>(E)) {
+ return CloneStringLiteral(SL, C);
+ }
+
+ if (auto *PE = dyn_cast<ParenExpr>(E)) {
+ return new (C) ParenExpr(PE->getBeginLoc(), PE->getEndLoc(),
+ CloneDrilled(PE->getSubExpr(), C));
+ }
+
+ if (auto *UO = dyn_cast<UnaryOperator>(E)) {
+ if (UO->getOpcode() == UO_Extension) {
+ return UnaryOperator::Create(
+ C, CloneDrilled(UO->getSubExpr(), C), UO_Extension, UO->getType(),
+ UO->getValueKind(), UO->getObjectKind(), UO->getBeginLoc(),
+ UO->canOverflow(), UO->getFPOptionsOverride());
+ }
+ }
+
+ else if (auto *GSE = dyn_cast<GenericSelectionExpr>(E)) {
+ if (!GSE->isResultDependent()) {
+ ArrayRef<Expr *> GSEAEs = GSE->getAssocExprs();
+ Expr **NewGSEAEs = new (C) Expr *[GSEAEs.size()];
+ std::copy(GSEAEs.begin(), GSEAEs.end(), NewGSEAEs);
+ NewGSEAEs[GSE->getResultIndex()] = CloneDrilled(GSE->getResultExpr(), C);
+
+#define CREATE_GSE(ctrl) \
+ GenericSelectionExpr::Create( \
+ C, GSE->getGenericLoc(), ctrl, GSE->getAssocTypeSourceInfos(), \
+ ArrayRef<Expr *>(NewGSEAEs, GSEAEs.size()), GSE->getDefaultLoc(), \
+ GSE->getRParenLoc(), GSE->containsUnexpandedParameterPack(), \
+ GSE->getResultIndex())
+
+ return GSE->isExprPredicate() ? CREATE_GSE(GSE->getControllingExpr())
+ : CREATE_GSE(GSE->getControllingType());
+#undef CREATE_GSE
+ }
+ }
+
+ else if (auto *CE = dyn_cast<ChooseExpr>(E)) {
+ if (!CE->isConditionDependent()) {
+ // Drills to `CE->getChosenSubExpr()`
+ const bool isCondTrue = CE->isConditionTrue();
+ return new (C)
+ ChooseExpr(CE->getBeginLoc(), CE->getCond(),
+ isCondTrue ? CloneDrilled(CE->getLHS(), C) : CE->getLHS(),
+ isCondTrue ? CE->getRHS() : CloneDrilled(CE->getRHS(), C),
+ CE->getType(), CE->getValueKind(), CE->getObjectKind(),
+ CE->getRParenLoc(), CE->isConditionTrue());
+ }
+ }
+
+ else if (auto *PE = dyn_cast<PredefinedExpr>(E)) {
+ if (PE->isTransparent() && PE->getFunctionName()) {
+ return PredefinedExpr::Create(
+ C, PE->getLocation(), PE->getType(), PE->getIdentKind(),
+ PE->isTransparent(), CloneStringLiteral(PE->getFunctionName(), C));
+ }
+ }
+
+ return E;
+}
+
/// Update the type of a string literal, including any surrounding parentheses,
/// to match the type of the object which it is initializing.
-static void updateStringLiteralType(Expr *E, QualType Ty) {
+static Expr *updateStringLiteralType(Expr *E, QualType Ty, Sema &S) {
+ Expr *ENew = CloneDrilled(E, S.Context);
+ E = ENew;
+
while (true) {
E->setType(Ty);
E->setValueKind(VK_PRValue);
@@ -178,6 +254,7 @@ static void updateStringLiteralType(Expr *E, QualType Ty) {
break;
E = IgnoreParensSingleStep(E);
}
+ return ENew;
}
/// Fix a compound literal initializing an array so it's correctly marked
@@ -209,7 +286,7 @@ static bool initializingConstexprVariable(const InitializedEntity &Entity) {
static void CheckC23ConstexprInitStringLiteral(const StringLiteral *SE,
Sema &SemaRef, QualType &TT);
-static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
+static Expr *CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
Sema &S, const InitializedEntity &Entity,
bool CheckC23ConstexprInit = false) {
// Get the length of the string as parsed.
@@ -228,8 +305,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
// Return a new array type (C99 6.7.8p22).
DeclT = S.Context.getConstantArrayType(
IAT->getElementType(), ConstVal, nullptr, ArraySizeModifier::Normal, 0);
- updateStringLiteralType(Str, DeclT);
- return;
+ return updateStringLiteralType(Str, DeclT, S);
}
const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
@@ -302,7 +378,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
// something like:
// char x[1] = "foo";
// then this will set the string literal's type to char[1].
- updateStringLiteralType(Str, DeclT);
+ return updateStringLiteralType(Str, DeclT, S);
}
void emitUninitializedExplicitInitFields(Sema &S, const RecordDecl *R) {
@@ -1608,10 +1684,13 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) {
// FIXME: Should we do this checking in verify-only mode?
- if (!VerifyOnly)
- CheckStringInit(expr, ElemType, arrayType, SemaRef, Entity,
- SemaRef.getLangOpts().C23 &&
- initializingConstexprVariable(Entity));
+ if (!VerifyOnly) {
+ expr = CheckStringInit(
+ expr, ElemType, arrayType, SemaRef, Entity,
+ SemaRef.getLangOpts().C23 &&
+ initializingConstexprVariable(Entity));
+ IList->setInit(Index, expr);
+ }
if (StructuredList)
UpdateStructuredListElement(StructuredList, StructuredIndex, expr);
++Index;
@@ -2121,10 +2200,13 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
// because doing so would involve allocating one character
// constant for each string.
// FIXME: Should we do these checks in verify-only mode too?
- if (!VerifyOnly)
- CheckStringInit(
- IList->getInit(Index), DeclType, arrayType, SemaRef, Entity,
- SemaRef.getLangOpts().C23 && initializingConstexprVariable(Entity));
+ if (!VerifyOnly) {
+ IList->setInit(
+ Index, CheckStringInit(IList->getInit(Index), DeclType, arrayType,
+ SemaRef, Entity,
+ SemaRef.getLangOpts().C23 &&
+ initializingConstexprVariable(Entity)));
+ }
if (StructuredList) {
UpdateStructuredListElement(StructuredList, StructuredIndex,
IList->getInit(Index));
@@ -8421,7 +8503,8 @@ ExprResult InitializationSequence::Perform(Sema &S,
case SK_StringInit: {
QualType Ty = Step->Type;
bool UpdateType = ResultType && Entity.getType()->isIncompleteArrayType();
- CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty,
+ CurInit = CheckStringInit(
+ CurInit.get(), UpdateType ? *ResultType : Ty,
S.Context.getAsArrayType(Ty), S, Entity,
S.getLangOpts().C23 &&
initializingConstexprVariable(Entity));
diff --git a/clang/test/SemaCXX/GH112189.cpp b/clang/test/SemaCXX/GH112189.cpp
new file mode 100644
index 0000000000000..036fd8ea83c0e
--- /dev/null
+++ b/clang/test/SemaCXX/GH112189.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -verify %s
+// expected-no-diagnostics
+
+template<unsigned int SPACE>
+char foo_choose() {
+ char buffer[SPACE] {__builtin_choose_expr(6, "foo", "boo")};
+ return buffer[0];
+}
+
+int boro_choose()
+{
+ int r = foo_choose<10>();
+ r += foo_choose<100>();
+ return r + foo_choose<4>();
+}
+
+template<unsigned int SPACE>
+char foo_gen_ext() {
+ char buffer[SPACE] {__extension__ (_Generic(0, int: (__extension__ "foo" )))};
+ return buffer[0];
+}
+
+int boro_gen_ext()
+{
+ int r = foo_gen_ext<10>();
+ r += foo_gen_ext<100>();
+ return r + foo_gen_ext<4>();
+}
+
+template<unsigned int SPACE>
+char foo_paren_predef() {
+ char buffer[SPACE] {(((__FILE__)))};
+ return buffer[0];
+}
+
+int boro_paren_predef()
+{
+ int r = foo_paren_predef<200000>();
+ r += foo_paren_predef<300000>();
+ return r + foo_paren_predef<100000>();
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please update the subject of the PR to better describe what the PR is actually fixing.
Thanks for working on this. I have some concerns that we do that transformation even in non template contexts, it seems a fairly big hammer. Can you try to find ways to limit how often the cloning is done? id you explore the other suggestion made by @zygoloid at all ? (to do it in TreeTransform) |
clang/lib/Sema/SemaInit.cpp
Outdated
#define CREATE_GSE(ctrl) \ | ||
GenericSelectionExpr::Create( \ | ||
C, GSE->getGenericLoc(), ctrl, GSE->getAssocTypeSourceInfos(), \ | ||
ArrayRef<Expr *>(NewGSEAEs, GSEAEs.size()), GSE->getDefaultLoc(), \ | ||
GSE->getRParenLoc(), GSE->containsUnexpandedParameterPack(), \ | ||
GSE->getResultIndex()) | ||
|
||
return GSE->isExprPredicate() ? CREATE_GSE(GSE->getControllingExpr()) | ||
: CREATE_GSE(GSE->getControllingType()); | ||
#undef CREATE_GSE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets use a lambda or a function here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
clang/lib/Sema/SemaInit.cpp
Outdated
// Exactly follow `IgnoreParensSingleStep` (`AST/IgnoreExpr.h`) | ||
// We only clone those subexpressions which `IgnoreParensSingleStep` drills down | ||
// to. | ||
static Expr *CloneDrilled(Expr *E, ASTContext &C) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we find a better name? CloneEnclosedStringLiteral
or smth?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps, but it is a string literal only in the innermost leaf, otherwise it may be parens, of generic or predefined or choose.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to CloneWithRecurseToInnermostSL
. I thing it much better reflects what is going on here.
clang/lib/Sema/SemaInit.cpp
Outdated
Expr *ENew = CloneDrilled(E, S.Context); | ||
E = ENew; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Expr *ENew = CloneDrilled(E, S.Context); | |
E = ENew; | |
E = CloneDrilled(E, S.Context); |
break; | ||
E = IgnoreParensSingleStep(E); | ||
} | ||
return ENew; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That should be E, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nope.
We need to keep ENew
for return.
E
drills down to innermost string literal.
I'm (almost) absolutely new to the clang codebase, and the second option seemed much more approachable to me to implement. I've looked into TreeTransform, but, while understanding (in principle) its purpose, I don't quite understand what should I do and where. But well, I'll look if I can limit the cloning. |
…es it modifies. Done as a separate static function and not inlined directly into the `updateStringLiteralType` function for maintainability.
@zygoloid, should I pursue the idea to restrict cloning of subexpressions in My current understanding is that the dependence information is completely lost in the |
Following the suggestion of @zygoloid
updateStringLiteralType
now recursively clones subexpressions which types it modifies.Done as a separate static function and not inlined directly into the
updateStringLiteralType
function for maintainability.I probably should have done this for the dependent case only, but the function is buried deep in the call stack and I didn't bother.
Fixes #112189