Skip to content

Commit 89da344

Browse files
[analyzer] Handle [[assume(cond)]] as __builtin_assume(cond) (#116462)
Resolves #100762 Gist of the change: 1. All the symbol analysis, constraint manager and expression parsing logic was already present, but the previous code didn't "visit" the expressions within `assume()` by parsing those expressions, all of the code "just works" by evaluating the SVals, and hence leaning on the same logic that makes the code with `__builtin_assume` work 2. "Ignore" an expression from adding in CFG if it has side-effects ( similar to CGStmt.cpp (todo add link)) 3. Add additional test case for ternary operator handling and modify CFG.cpp's VisitGuardedExpr code for `continue`-ing if the `ProgramPoint` is a `StmtPoint` --------- Co-authored-by: Balazs Benics <[email protected]>
1 parent eb812d2 commit 89da344

File tree

8 files changed

+218
-25
lines changed

8 files changed

+218
-25
lines changed

clang/include/clang/AST/AttrIterator.h

+12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "clang/Basic/LLVM.h"
1717
#include "llvm/ADT/ADL.h"
1818
#include "llvm/ADT/SmallVector.h"
19+
#include "llvm/ADT/iterator_range.h"
1920
#include "llvm/Support/Casting.h"
2021
#include <cassert>
2122
#include <cstddef>
@@ -124,6 +125,17 @@ inline auto *getSpecificAttr(const Container &container) {
124125
return It != specific_attr_end<IterTy>(container) ? *It : nullptr;
125126
}
126127

128+
template <typename SpecificAttr, typename Container>
129+
inline auto getSpecificAttrs(const Container &container) {
130+
using ValueTy = llvm::detail::ValueOfRange<Container>;
131+
using ValuePointeeTy = std::remove_pointer_t<ValueTy>;
132+
using IterTy = std::conditional_t<std::is_const_v<ValuePointeeTy>,
133+
const SpecificAttr, SpecificAttr>;
134+
auto Begin = specific_attr_begin<IterTy>(container);
135+
auto End = specific_attr_end<IterTy>(container);
136+
return llvm::make_range(Begin, End);
137+
}
138+
127139
} // namespace clang
128140

129141
#endif // LLVM_CLANG_AST_ATTRITERATOR_H

clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h

+4
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,10 @@ class ExprEngine {
498498
void VisitInitListExpr(const InitListExpr *E, ExplodedNode *Pred,
499499
ExplodedNodeSet &Dst);
500500

501+
/// VisitAttributedStmt - Transfer function logic for AttributedStmt
502+
void VisitAttributedStmt(const AttributedStmt *A, ExplodedNode *Pred,
503+
ExplodedNodeSet &Dst);
504+
501505
/// VisitLogicalExpr - Transfer function logic for '&&', '||'
502506
void VisitLogicalExpr(const BinaryOperator* B, ExplodedNode *Pred,
503507
ExplodedNodeSet &Dst);

clang/lib/Analysis/CFG.cpp

+52-20
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ class reverse_children {
433433
ArrayRef<Stmt *> children;
434434

435435
public:
436-
reverse_children(Stmt *S);
436+
reverse_children(Stmt *S, ASTContext &Ctx);
437437

438438
using iterator = ArrayRef<Stmt *>::reverse_iterator;
439439

@@ -443,28 +443,47 @@ class reverse_children {
443443

444444
} // namespace
445445

446-
reverse_children::reverse_children(Stmt *S) {
447-
if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
448-
children = CE->getRawSubExprs();
446+
reverse_children::reverse_children(Stmt *S, ASTContext &Ctx) {
447+
switch (S->getStmtClass()) {
448+
case Stmt::CallExprClass: {
449+
children = cast<CallExpr>(S)->getRawSubExprs();
449450
return;
450451
}
451-
switch (S->getStmtClass()) {
452-
// Note: Fill in this switch with more cases we want to optimize.
453-
case Stmt::InitListExprClass: {
454-
InitListExpr *IE = cast<InitListExpr>(S);
455-
children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()),
456-
IE->getNumInits());
457-
return;
458-
}
459-
default:
460-
break;
452+
453+
// Note: Fill in this switch with more cases we want to optimize.
454+
case Stmt::InitListExprClass: {
455+
InitListExpr *IE = cast<InitListExpr>(S);
456+
children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()),
457+
IE->getNumInits());
458+
return;
461459
}
460+
case Stmt::AttributedStmtClass: {
461+
auto *AS = cast<AttributedStmt>(S);
462462

463-
// Default case for all other statements.
464-
llvm::append_range(childrenBuf, S->children());
463+
// for an attributed stmt, the "children()" returns only the NullStmt
464+
// (;) but semantically the "children" are supposed to be the
465+
// expressions _within_ i.e. the two square brackets i.e. [[ HERE ]]
466+
// so we add the subexpressions first, _then_ add the "children"
465467

466-
// This needs to be done *after* childrenBuf has been populated.
467-
children = childrenBuf;
468+
for (const auto *Attr : AS->getAttrs()) {
469+
if (const auto *AssumeAttr = dyn_cast<CXXAssumeAttr>(Attr)) {
470+
Expr *AssumeExpr = AssumeAttr->getAssumption();
471+
if (!AssumeExpr->HasSideEffects(Ctx)) {
472+
childrenBuf.push_back(AssumeExpr);
473+
}
474+
}
475+
// Visit the actual children AST nodes.
476+
// For CXXAssumeAttrs, this is always a NullStmt.
477+
llvm::append_range(childrenBuf, AS->children());
478+
children = childrenBuf;
479+
}
480+
return;
481+
}
482+
default:
483+
// Default case for all other statements.
484+
llvm::append_range(childrenBuf, S->children());
485+
children = childrenBuf;
486+
}
468487
}
469488

470489
namespace {
@@ -2431,7 +2450,7 @@ CFGBlock *CFGBuilder::VisitChildren(Stmt *S) {
24312450

24322451
// Visit the children in their reverse order so that they appear in
24332452
// left-to-right (natural) order in the CFG.
2434-
reverse_children RChildren(S);
2453+
reverse_children RChildren(S, *Context);
24352454
for (Stmt *Child : RChildren) {
24362455
if (Child)
24372456
if (CFGBlock *R = Visit(Child))
@@ -2447,7 +2466,7 @@ CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) {
24472466
}
24482467
CFGBlock *B = Block;
24492468

2450-
reverse_children RChildren(ILE);
2469+
reverse_children RChildren(ILE, *Context);
24512470
for (Stmt *Child : RChildren) {
24522471
if (!Child)
24532472
continue;
@@ -2482,6 +2501,14 @@ static bool isFallthroughStatement(const AttributedStmt *A) {
24822501
return isFallthrough;
24832502
}
24842503

2504+
static bool isCXXAssumeAttr(const AttributedStmt *A) {
2505+
bool hasAssumeAttr = hasSpecificAttr<CXXAssumeAttr>(A->getAttrs());
2506+
2507+
assert((!hasAssumeAttr || isa<NullStmt>(A->getSubStmt())) &&
2508+
"expected [[assume]] not to have children");
2509+
return hasAssumeAttr;
2510+
}
2511+
24852512
CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A,
24862513
AddStmtChoice asc) {
24872514
// AttributedStmts for [[likely]] can have arbitrary statements as children,
@@ -2497,6 +2524,11 @@ CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A,
24972524
appendStmt(Block, A);
24982525
}
24992526

2527+
if (isCXXAssumeAttr(A) && asc.alwaysAdd(*this, A)) {
2528+
autoCreateBlock();
2529+
appendStmt(Block, A);
2530+
}
2531+
25002532
return VisitChildren(A);
25012533
}
25022534

clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -1941,7 +1941,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
19411941
// to be explicitly evaluated.
19421942
case Stmt::PredefinedExprClass:
19431943
case Stmt::AddrLabelExprClass:
1944-
case Stmt::AttributedStmtClass:
19451944
case Stmt::IntegerLiteralClass:
19461945
case Stmt::FixedPointLiteralClass:
19471946
case Stmt::CharacterLiteralClass:
@@ -1972,6 +1971,13 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
19721971
break;
19731972
}
19741973

1974+
case Stmt::AttributedStmtClass: {
1975+
Bldr.takeNodes(Pred);
1976+
VisitAttributedStmt(cast<AttributedStmt>(S), Pred, Dst);
1977+
Bldr.addNodes(Dst);
1978+
break;
1979+
}
1980+
19751981
case Stmt::CXXDefaultArgExprClass:
19761982
case Stmt::CXXDefaultInitExprClass: {
19771983
Bldr.takeNodes(Pred);

clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -794,17 +794,18 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex,
794794

795795
// Find the predecessor block.
796796
ProgramStateRef SrcState = state;
797+
797798
for (const ExplodedNode *N = Pred ; N ; N = *N->pred_begin()) {
798-
ProgramPoint PP = N->getLocation();
799-
if (PP.getAs<PreStmtPurgeDeadSymbols>() || PP.getAs<BlockEntrance>()) {
799+
auto Edge = N->getLocationAs<BlockEdge>();
800+
if (!Edge.has_value()) {
800801
// If the state N has multiple predecessors P, it means that successors
801802
// of P are all equivalent.
802803
// In turn, that means that all nodes at P are equivalent in terms
803804
// of observable behavior at N, and we can follow any of them.
804805
// FIXME: a more robust solution which does not walk up the tree.
805806
continue;
806807
}
807-
SrcBlock = PP.castAs<BlockEdge>().getSrc();
808+
SrcBlock = Edge->getSrc();
808809
SrcState = N->getState();
809810
break;
810811
}

clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "clang/AST/AttrIterator.h"
1314
#include "clang/AST/DeclCXX.h"
1415
#include "clang/AST/ParentMap.h"
1516
#include "clang/AST/StmtCXX.h"
@@ -1200,3 +1201,20 @@ void ExprEngine::VisitLambdaExpr(const LambdaExpr *LE, ExplodedNode *Pred,
12001201
// FIXME: Move all post/pre visits to ::Visit().
12011202
getCheckerManager().runCheckersForPostStmt(Dst, Tmp, LE, *this);
12021203
}
1204+
1205+
void ExprEngine::VisitAttributedStmt(const AttributedStmt *A,
1206+
ExplodedNode *Pred, ExplodedNodeSet &Dst) {
1207+
ExplodedNodeSet CheckerPreStmt;
1208+
getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, A, *this);
1209+
1210+
ExplodedNodeSet EvalSet;
1211+
StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);
1212+
1213+
for (const auto *Attr : getSpecificAttrs<CXXAssumeAttr>(A->getAttrs())) {
1214+
for (ExplodedNode *N : CheckerPreStmt) {
1215+
Visit(Attr->getAssumption(), N, EvalSet);
1216+
}
1217+
}
1218+
1219+
getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this);
1220+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// RUN: %clang_analyze_cc1 -std=c++23 -triple x86_64-pc-linux-gnu \
2+
// RUN: -analyzer-checker=core,debug.ExprInspection -verify %s
3+
4+
template <typename T> void clang_analyzer_dump(T);
5+
template <typename T> void clang_analyzer_value(T);
6+
7+
int ternary_in_builtin_assume(int a, int b) {
8+
__builtin_assume(a > 10 ? b == 4 : b == 10);
9+
10+
clang_analyzer_value(a);
11+
// expected-warning@-1 {{[-2147483648, 10]}}
12+
// expected-warning@-2 {{[11, 2147483647]}}
13+
14+
clang_analyzer_dump(b); // expected-warning{{4}} expected-warning{{10}}
15+
16+
if (a > 20) {
17+
clang_analyzer_dump(b + 100); // expected-warning {{104}}
18+
return 2;
19+
}
20+
if (a > 10) {
21+
clang_analyzer_dump(b + 200); // expected-warning {{204}}
22+
return 1;
23+
}
24+
clang_analyzer_dump(b + 300); // expected-warning {{310}}
25+
return 0;
26+
}
27+
28+
// From: https://github.com/llvm/llvm-project/pull/116462#issuecomment-2517853226
29+
int ternary_in_assume(int a, int b) {
30+
// FIXME notes
31+
// Currently, if this test is run without the core.builtin.Builtin checker, the above function with the __builtin_assume behaves identically to the following test
32+
// i.e. calls to `clang_analyzer_dump` result in "extraneous" prints of the SVal(s) `reg<int b> ...`
33+
// as opposed to 4 or 10
34+
// which likely implies the Program State(s) did not get narrowed.
35+
// A new checker is likely needed to be implemented to properly handle the expressions within `[[assume]]` to eliminate the states where `b` is not narrowed.
36+
37+
[[assume(a > 10 ? b == 4 : b == 10)]];
38+
clang_analyzer_value(a);
39+
// expected-warning@-1 {{[-2147483648, 10]}}
40+
// expected-warning@-2 {{[11, 2147483647]}}
41+
42+
clang_analyzer_dump(b); // expected-warning {{4}} expected-warning {{10}}
43+
// expected-warning-re@-1 {{reg_${{[0-9]+}}<int b>}} FIXME: We shouldn't have this dump.
44+
45+
if (a > 20) {
46+
clang_analyzer_dump(b + 100); // expected-warning {{104}}
47+
// expected-warning-re@-1 {{(reg_${{[0-9]+}}<int b>) + 100}} FIXME: We shouldn't have this dump.
48+
return 2;
49+
}
50+
if (a > 10) {
51+
clang_analyzer_dump(b + 200); // expected-warning {{204}}
52+
// expected-warning-re@-1 {{(reg_${{[0-9]+}}<int b>) + 200}} FIXME: We shouldn't have this dump.
53+
return 1;
54+
}
55+
clang_analyzer_dump(b + 300); // expected-warning {{310}}
56+
// expected-warning-re@-1 {{(reg_${{[0-9]+}}<int b>) + 300}} FIXME: We shouldn't have this dump.
57+
return 0;
58+
}

clang/test/Analysis/out-of-bounds-new.cpp

+63-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -analyzer-checker=unix,core,alpha.security.ArrayBoundV2 -verify %s
1+
// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -verify %s \
2+
// RUN: -analyzer-checker=unix,core,alpha.security.ArrayBoundV2,debug.ExprInspection
3+
4+
template <typename T> void clang_analyzer_dump(T);
5+
template <typename T> void clang_analyzer_value(T);
6+
void clang_analyzer_eval(bool);
7+
template <typename T>
8+
void clang_analyzer_explain(T);
29

310
// Tests doing an out-of-bounds access after the end of an array using:
411
// - constant integer index
@@ -180,3 +187,58 @@ int test_reference_that_might_be_after_the_end(int idx) {
180187
return ref;
181188
}
182189

190+
// From: https://github.com/llvm/llvm-project/issues/100762
191+
extern int arrOf10[10];
192+
void using_builtin(int x) {
193+
__builtin_assume(x > 101); // CallExpr
194+
arrOf10[x] = 404; // expected-warning {{Out of bound access to memory}}
195+
}
196+
197+
void using_assume_attr(int ax) {
198+
[[assume(ax > 100)]]; // NullStmt with an "assume" attribute.
199+
arrOf10[ax] = 405; // expected-warning {{Out of bound access to memory}}
200+
}
201+
202+
void using_many_assume_attr(int yx) {
203+
[[assume(yx > 104), assume(yx > 200), assume(yx < 300)]]; // NullStmt with an attribute
204+
arrOf10[yx] = 406; // expected-warning{{Out of bound access to memory}}
205+
}
206+
207+
208+
int using_builtin_assume_has_no_sideeffects(int y) {
209+
// We should not apply sideeffects of the argument of [[assume(...)]].
210+
// "y" should not get incremented;
211+
__builtin_assume(++y == 43); // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
212+
clang_analyzer_eval(y == 42); // expected-warning {{FALSE}}
213+
return y;
214+
}
215+
216+
217+
218+
int using_assume_attr_has_no_sideeffects(int y) {
219+
220+
// We should not apply sideeffects of the argument of [[assume(...)]].
221+
// "y" should not get incremented;
222+
[[assume(++y == 43)]]; // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
223+
224+
clang_analyzer_eval(y == 42); // expected-warning {{TRUE}} expected-warning {{FALSE}} FIXME: This should be only TRUE.
225+
226+
clang_analyzer_eval(y == 43); // expected-warning {{FALSE}} expected-warning {{TRUE}} FIXME: This should be only FALSE.
227+
228+
return y;
229+
}
230+
231+
232+
int using_builtinassume_has_no_sideeffects(int u) {
233+
// We should not apply sideeffects of the argument of __builtin_assume(...)
234+
// "u" should not get incremented;
235+
__builtin_assume(++u == 43); // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
236+
237+
// FIXME: evaluate this to true
238+
clang_analyzer_eval(u == 42); // expected-warning {{FALSE}} current behavior
239+
240+
// FIXME: evaluate this to false
241+
clang_analyzer_eval(u == 43); // expected-warning {{TRUE}} current behavior
242+
243+
return u;
244+
}

0 commit comments

Comments
 (0)