Skip to content

Commit 7e5821b

Browse files
authored
Reapply "[analyzer] Handle [[assume(cond)]] as __builtin_assume(cond)" (#129234)
This is the second attempt to bring initial support for [[assume()]] in the Clang Static Analyzer. The first attempt (#116462) was reverted in 2b9abf0 due to some weird failure in a libcxx test involving `#pragma clang loop vectorize(enable) interleave(enable)`. The failure could be reduced into: ```c++ template <class ExecutionPolicy> void transform(ExecutionPolicy) { #pragma clang loop vectorize(enable) interleave(enable) for (int i = 0; 0;) { // The DeclStmt of "i" would be added twice in the ThreadSafety analysis. // empty } } void entrypoint() { transform(1); } ``` As it turns out, the problem with the initial patch was this: ```c++ for (const auto *Attr : AS->getAttrs()) { if (const auto *AssumeAttr = dyn_cast<CXXAssumeAttr>(Attr)) { Expr *AssumeExpr = AssumeAttr->getAssumption(); if (!AssumeExpr->HasSideEffects(Ctx)) { childrenBuf.push_back(AssumeExpr); } } // Visit the actual children AST nodes. // For CXXAssumeAttrs, this is always a NullStmt. llvm::append_range(childrenBuf, AS->children()); // <--- This was not meant to be part of the "for" loop. children = childrenBuf; } return; ``` The solution was simple. Just hoist it from the loop. I also had a closer look at `CFGBuilder::VisitAttributedStmt`, where I also spotted another bug. We would have added the CFG blocks twice if the AttributedStmt would have both the `[[fallthrough]]` and the `[[assume()]]` attributes. With my fix, it will only once add the blocks. Added a regression test for this. Co-authored-by: Vinay Deshmukh <vinay_deshmukh AT outlook DOT com>
1 parent a28daa7 commit 7e5821b

File tree

8 files changed

+242
-20
lines changed

8 files changed

+242
-20
lines changed

clang/include/clang/AST/AttrIterator.h

+12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "clang/Basic/LLVM.h"
1717
#include "llvm/ADT/ADL.h"
1818
#include "llvm/ADT/SmallVector.h"
19+
#include "llvm/ADT/iterator_range.h"
1920
#include "llvm/Support/Casting.h"
2021
#include <cassert>
2122
#include <cstddef>
@@ -124,6 +125,17 @@ inline auto *getSpecificAttr(const Container &container) {
124125
return It != specific_attr_end<IterTy>(container) ? *It : nullptr;
125126
}
126127

128+
template <typename SpecificAttr, typename Container>
129+
inline auto getSpecificAttrs(const Container &container) {
130+
using ValueTy = llvm::detail::ValueOfRange<Container>;
131+
using ValuePointeeTy = std::remove_pointer_t<ValueTy>;
132+
using IterTy = std::conditional_t<std::is_const_v<ValuePointeeTy>,
133+
const SpecificAttr, SpecificAttr>;
134+
auto Begin = specific_attr_begin<IterTy>(container);
135+
auto End = specific_attr_end<IterTy>(container);
136+
return llvm::make_range(Begin, End);
137+
}
138+
127139
} // namespace clang
128140

129141
#endif // LLVM_CLANG_AST_ATTRITERATOR_H

clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h

+4
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,10 @@ class ExprEngine {
498498
void VisitInitListExpr(const InitListExpr *E, ExplodedNode *Pred,
499499
ExplodedNodeSet &Dst);
500500

501+
/// VisitAttributedStmt - Transfer function logic for AttributedStmt
502+
void VisitAttributedStmt(const AttributedStmt *A, ExplodedNode *Pred,
503+
ExplodedNodeSet &Dst);
504+
501505
/// VisitLogicalExpr - Transfer function logic for '&&', '||'
502506
void VisitLogicalExpr(const BinaryOperator* B, ExplodedNode *Pred,
503507
ExplodedNodeSet &Dst);

clang/lib/Analysis/CFG.cpp

+57-14
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ class reverse_children {
433433
ArrayRef<Stmt *> children;
434434

435435
public:
436-
reverse_children(Stmt *S);
436+
reverse_children(Stmt *S, ASTContext &Ctx);
437437

438438
using iterator = ArrayRef<Stmt *>::reverse_iterator;
439439

@@ -443,21 +443,44 @@ class reverse_children {
443443

444444
} // namespace
445445

446-
reverse_children::reverse_children(Stmt *S) {
446+
reverse_children::reverse_children(Stmt *S, ASTContext &Ctx) {
447447
if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
448448
children = CE->getRawSubExprs();
449449
return;
450450
}
451+
451452
switch (S->getStmtClass()) {
452-
// Note: Fill in this switch with more cases we want to optimize.
453-
case Stmt::InitListExprClass: {
454-
InitListExpr *IE = cast<InitListExpr>(S);
455-
children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()),
456-
IE->getNumInits());
457-
return;
453+
// Note: Fill in this switch with more cases we want to optimize.
454+
case Stmt::InitListExprClass: {
455+
InitListExpr *IE = cast<InitListExpr>(S);
456+
children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()),
457+
IE->getNumInits());
458+
return;
459+
}
460+
461+
case Stmt::AttributedStmtClass: {
462+
// For an attributed stmt, the "children()" returns only the NullStmt
463+
// (;) but semantically the "children" are supposed to be the
464+
// expressions _within_ i.e. the two square brackets i.e. [[ HERE ]]
465+
// so we add the subexpressions first, _then_ add the "children"
466+
auto *AS = cast<AttributedStmt>(S);
467+
for (const auto *Attr : AS->getAttrs()) {
468+
if (const auto *AssumeAttr = dyn_cast<CXXAssumeAttr>(Attr)) {
469+
Expr *AssumeExpr = AssumeAttr->getAssumption();
470+
if (!AssumeExpr->HasSideEffects(Ctx)) {
471+
childrenBuf.push_back(AssumeExpr);
472+
}
473+
}
458474
}
459-
default:
460-
break;
475+
476+
// Visit the actual children AST nodes.
477+
// For CXXAssumeAttrs, this is always a NullStmt.
478+
llvm::append_range(childrenBuf, AS->children());
479+
children = childrenBuf;
480+
return;
481+
}
482+
default:
483+
break;
461484
}
462485

463486
// Default case for all other statements.
@@ -2433,7 +2456,7 @@ CFGBlock *CFGBuilder::VisitChildren(Stmt *S) {
24332456

24342457
// Visit the children in their reverse order so that they appear in
24352458
// left-to-right (natural) order in the CFG.
2436-
reverse_children RChildren(S);
2459+
reverse_children RChildren(S, *Context);
24372460
for (Stmt *Child : RChildren) {
24382461
if (Child)
24392462
if (CFGBlock *R = Visit(Child))
@@ -2449,7 +2472,7 @@ CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) {
24492472
}
24502473
CFGBlock *B = Block;
24512474

2452-
reverse_children RChildren(ILE);
2475+
reverse_children RChildren(ILE, *Context);
24532476
for (Stmt *Child : RChildren) {
24542477
if (!Child)
24552478
continue;
@@ -2484,6 +2507,14 @@ static bool isFallthroughStatement(const AttributedStmt *A) {
24842507
return isFallthrough;
24852508
}
24862509

2510+
static bool isCXXAssumeAttr(const AttributedStmt *A) {
2511+
bool hasAssumeAttr = hasSpecificAttr<CXXAssumeAttr>(A->getAttrs());
2512+
2513+
assert((!hasAssumeAttr || isa<NullStmt>(A->getSubStmt())) &&
2514+
"expected [[assume]] not to have children");
2515+
return hasAssumeAttr;
2516+
}
2517+
24872518
CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A,
24882519
AddStmtChoice asc) {
24892520
// AttributedStmts for [[likely]] can have arbitrary statements as children,
@@ -2494,7 +2525,8 @@ CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A,
24942525
// So only add the AttributedStmt for FallThrough, which has CFG effects and
24952526
// also no children, and omit the others. None of the other current StmtAttrs
24962527
// have semantic meaning for the CFG.
2497-
if (isFallthroughStatement(A) && asc.alwaysAdd(*this, A)) {
2528+
bool isInterestingAttribute = isFallthroughStatement(A) || isCXXAssumeAttr(A);
2529+
if (isInterestingAttribute && asc.alwaysAdd(*this, A)) {
24982530
autoCreateBlock();
24992531
appendStmt(Block, A);
25002532
}
@@ -2700,6 +2732,16 @@ static bool CanThrow(Expr *E, ASTContext &Ctx) {
27002732
return true;
27012733
}
27022734

2735+
static bool isBuiltinAssumeWithSideEffects(const ASTContext &Ctx,
2736+
const CallExpr *CE) {
2737+
unsigned BuiltinID = CE->getBuiltinCallee();
2738+
if (BuiltinID != Builtin::BI__assume &&
2739+
BuiltinID != Builtin::BI__builtin_assume)
2740+
return false;
2741+
2742+
return CE->getArg(0)->HasSideEffects(Ctx);
2743+
}
2744+
27032745
CFGBlock *CFGBuilder::VisitCallExpr(CallExpr *C, AddStmtChoice asc) {
27042746
// Compute the callee type.
27052747
QualType calleeType = C->getCallee()->getType();
@@ -2738,7 +2780,8 @@ CFGBlock *CFGBuilder::VisitCallExpr(CallExpr *C, AddStmtChoice asc) {
27382780
NoReturn = true;
27392781
if (FD->hasAttr<NoThrowAttr>())
27402782
AddEHEdge = false;
2741-
if (FD->getBuiltinID() == Builtin::BI__builtin_object_size ||
2783+
if (isBuiltinAssumeWithSideEffects(FD->getASTContext(), C) ||
2784+
FD->getBuiltinID() == Builtin::BI__builtin_object_size ||
27422785
FD->getBuiltinID() == Builtin::BI__builtin_dynamic_object_size)
27432786
OmitArguments = true;
27442787
}

clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -1951,7 +1951,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
19511951
// to be explicitly evaluated.
19521952
case Stmt::PredefinedExprClass:
19531953
case Stmt::AddrLabelExprClass:
1954-
case Stmt::AttributedStmtClass:
19551954
case Stmt::IntegerLiteralClass:
19561955
case Stmt::FixedPointLiteralClass:
19571956
case Stmt::CharacterLiteralClass:
@@ -1982,6 +1981,13 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
19821981
break;
19831982
}
19841983

1984+
case Stmt::AttributedStmtClass: {
1985+
Bldr.takeNodes(Pred);
1986+
VisitAttributedStmt(cast<AttributedStmt>(S), Pred, Dst);
1987+
Bldr.addNodes(Dst);
1988+
break;
1989+
}
1990+
19851991
case Stmt::CXXDefaultArgExprClass:
19861992
case Stmt::CXXDefaultInitExprClass: {
19871993
Bldr.takeNodes(Pred);

clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -797,16 +797,16 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex,
797797
// Find the predecessor block.
798798
ProgramStateRef SrcState = state;
799799
for (const ExplodedNode *N = Pred ; N ; N = *N->pred_begin()) {
800-
ProgramPoint PP = N->getLocation();
801-
if (PP.getAs<PreStmtPurgeDeadSymbols>() || PP.getAs<BlockEntrance>()) {
800+
auto Edge = N->getLocationAs<BlockEdge>();
801+
if (!Edge.has_value()) {
802802
// If the state N has multiple predecessors P, it means that successors
803803
// of P are all equivalent.
804804
// In turn, that means that all nodes at P are equivalent in terms
805805
// of observable behavior at N, and we can follow any of them.
806806
// FIXME: a more robust solution which does not walk up the tree.
807807
continue;
808808
}
809-
SrcBlock = PP.castAs<BlockEdge>().getSrc();
809+
SrcBlock = Edge->getSrc();
810810
SrcState = N->getState();
811811
break;
812812
}

clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp

+19-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "clang/AST/AttrIterator.h"
1314
#include "clang/AST/DeclCXX.h"
1415
#include "clang/AST/ParentMap.h"
1516
#include "clang/AST/StmtCXX.h"
@@ -1205,4 +1206,21 @@ void ExprEngine::VisitLambdaExpr(const LambdaExpr *LE, ExplodedNode *Pred,
12051206

12061207
// FIXME: Move all post/pre visits to ::Visit().
12071208
getCheckerManager().runCheckersForPostStmt(Dst, Tmp, LE, *this);
1208-
}
1209+
}
1210+
1211+
void ExprEngine::VisitAttributedStmt(const AttributedStmt *A,
1212+
ExplodedNode *Pred, ExplodedNodeSet &Dst) {
1213+
ExplodedNodeSet CheckerPreStmt;
1214+
getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, A, *this);
1215+
1216+
ExplodedNodeSet EvalSet;
1217+
StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);
1218+
1219+
for (const auto *Attr : getSpecificAttrs<CXXAssumeAttr>(A->getAttrs())) {
1220+
for (ExplodedNode *N : CheckerPreStmt) {
1221+
Visit(Attr->getAssumption(), N, EvalSet);
1222+
}
1223+
}
1224+
1225+
getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this);
1226+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// RUN: %clang_analyze_cc1 -std=c++23 -triple x86_64-pc-linux-gnu \
2+
// RUN: -analyzer-checker=core,debug.ExprInspection -verify %s
3+
4+
template <typename T> void clang_analyzer_dump(T);
5+
template <typename T> void clang_analyzer_value(T);
6+
7+
int ternary_in_builtin_assume(int a, int b) {
8+
__builtin_assume(a > 10 ? b == 4 : b == 10);
9+
10+
clang_analyzer_value(a);
11+
// expected-warning@-1 {{[-2147483648, 10]}}
12+
// expected-warning@-2 {{[11, 2147483647]}}
13+
14+
clang_analyzer_dump(b); // expected-warning{{4}} expected-warning{{10}}
15+
16+
if (a > 20) {
17+
clang_analyzer_dump(b + 100); // expected-warning {{104}}
18+
return 2;
19+
}
20+
if (a > 10) {
21+
clang_analyzer_dump(b + 200); // expected-warning {{204}}
22+
return 1;
23+
}
24+
clang_analyzer_dump(b + 300); // expected-warning {{310}}
25+
return 0;
26+
}
27+
28+
// From: https://github.com/llvm/llvm-project/pull/116462#issuecomment-2517853226
29+
int ternary_in_assume(int a, int b) {
30+
// FIXME notes
31+
// Currently, if this test is run without the core.builtin.Builtin checker, the above function with the __builtin_assume behaves identically to the following test
32+
// i.e. calls to `clang_analyzer_dump` result in "extraneous" prints of the SVal(s) `reg<int b> ...`
33+
// as opposed to 4 or 10
34+
// which likely implies the Program State(s) did not get narrowed.
35+
// A new checker is likely needed to be implemented to properly handle the expressions within `[[assume]]` to eliminate the states where `b` is not narrowed.
36+
37+
[[assume(a > 10 ? b == 4 : b == 10)]];
38+
clang_analyzer_value(a);
39+
// expected-warning@-1 {{[-2147483648, 10]}}
40+
// expected-warning@-2 {{[11, 2147483647]}}
41+
42+
clang_analyzer_dump(b); // expected-warning {{4}} expected-warning {{10}}
43+
// expected-warning-re@-1 {{reg_${{[0-9]+}}<int b>}} FIXME: We shouldn't have this dump.
44+
45+
if (a > 20) {
46+
clang_analyzer_dump(b + 100); // expected-warning {{104}}
47+
// expected-warning-re@-1 {{(reg_${{[0-9]+}}<int b>) + 100}} FIXME: We shouldn't have this dump.
48+
return 2;
49+
}
50+
if (a > 10) {
51+
clang_analyzer_dump(b + 200); // expected-warning {{204}}
52+
// expected-warning-re@-1 {{(reg_${{[0-9]+}}<int b>) + 200}} FIXME: We shouldn't have this dump.
53+
return 1;
54+
}
55+
clang_analyzer_dump(b + 300); // expected-warning {{310}}
56+
// expected-warning-re@-1 {{(reg_${{[0-9]+}}<int b>) + 300}} FIXME: We shouldn't have this dump.
57+
return 0;
58+
}
59+
60+
int assume_and_fallthrough_at_the_same_attrstmt(int a, int b) {
61+
[[assume(a == 2)]];
62+
clang_analyzer_dump(a); // expected-warning {{2 S32b}}
63+
// expected-warning-re@-1 {{reg_${{[0-9]+}}<int a>}} FIXME: We shouldn't have this dump.
64+
switch (a) {
65+
case 2:
66+
[[fallthrough, assume(b == 30)]];
67+
case 4: {
68+
clang_analyzer_dump(b); // expected-warning {{30 S32b}}
69+
// expected-warning-re@-1 {{reg_${{[0-9]+}}<int b>}} FIXME: We shouldn't have this dump.
70+
return b;
71+
}
72+
}
73+
// This code should be unreachable.
74+
[[assume(false)]]; // This should definitely make it so.
75+
clang_analyzer_dump(33); // expected-warning {{33 S32b}}
76+
return 0;
77+
}

clang/test/Analysis/out-of-bounds-new.cpp

+63-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -analyzer-checker=unix,core,security.ArrayBound -verify %s
1+
// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -verify %s \
2+
// RUN: -triple=x86_64-unknown-linux-gnu \
3+
// RUN: -analyzer-checker=unix,core,security.ArrayBound,debug.ExprInspection
4+
5+
void clang_analyzer_eval(bool);
6+
void clang_analyzer_value(int);
7+
void clang_analyzer_dump(int);
28

39
// Tests doing an out-of-bounds access after the end of an array using:
410
// - constant integer index
@@ -180,3 +186,59 @@ int test_reference_that_might_be_after_the_end(int idx) {
180186
return ref;
181187
}
182188

189+
// From: https://github.com/llvm/llvm-project/issues/100762
190+
extern int arrOf10[10];
191+
void using_builtin(int x) {
192+
__builtin_assume(x > 101); // CallExpr
193+
arrOf10[x] = 404; // expected-warning {{Out of bound access to memory}}
194+
}
195+
196+
void using_assume_attr(int ax) {
197+
[[assume(ax > 100)]]; // NullStmt with an "assume" attribute.
198+
arrOf10[ax] = 405; // expected-warning {{Out of bound access to memory}}
199+
}
200+
201+
void using_many_assume_attr(int yx) {
202+
[[assume(yx > 104), assume(yx > 200), assume(yx < 300)]]; // NullStmt with an attribute
203+
arrOf10[yx] = 406; // expected-warning{{Out of bound access to memory}}
204+
}
205+
206+
int using_assume_attr_has_no_sideeffects(int y) {
207+
int orig_y = y;
208+
clang_analyzer_value(y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
209+
clang_analyzer_value(orig_y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
210+
clang_analyzer_dump(y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
211+
clang_analyzer_dump(orig_y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
212+
213+
// We should not apply sideeffects of the argument of [[assume(...)]].
214+
// "y" should not get incremented;
215+
[[assume(++y == 43)]]; // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
216+
217+
clang_analyzer_dump(y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
218+
clang_analyzer_dump(orig_y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
219+
clang_analyzer_value(y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
220+
clang_analyzer_value(orig_y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
221+
clang_analyzer_eval(y == orig_y); // expected-warning {{TRUE}} Good.
222+
223+
return y;
224+
}
225+
226+
int using_builtin_assume_has_no_sideeffects(int y) {
227+
int orig_y = y;
228+
clang_analyzer_value(y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
229+
clang_analyzer_value(orig_y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
230+
clang_analyzer_dump(y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
231+
clang_analyzer_dump(orig_y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
232+
233+
// We should not apply sideeffects of the argument of __builtin_assume(...)
234+
// "u" should not get incremented;
235+
__builtin_assume(++y == 43); // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
236+
237+
clang_analyzer_dump(y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
238+
clang_analyzer_dump(orig_y); // expected-warning-re {{{{^}}reg_${{[0-9]+}}<int y> [debug.ExprInspection]{{$}}}}
239+
clang_analyzer_value(y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
240+
clang_analyzer_value(orig_y); // expected-warning {{32s:{ [-2147483648, 2147483647] }}}
241+
clang_analyzer_eval(y == orig_y); // expected-warning {{TRUE}} Good.
242+
243+
return y;
244+
}

0 commit comments

Comments
 (0)