llvm · NagyDonat · Sep 23, 2024 · Sep 24, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -121,6 +121,34 @@ struct EvalCallOptions {
   EvalCallOptions() {}
 };
 
+/// Simple control flow statements like `if` can only produce a single two-way
+/// state split, so when the analyzer cannot determine the value of the
+/// condition, it can assume either of the two options, because the fact that
+/// they are in the source code implies that the programmer thought that they
+/// are possible (at least under some conditions).
+/// (Note that this heuristic is not entirely correct when there are _several_
+/// `if` statements with unmarked logical connections between them, but it's
+/// still good enough and the analyzer heavily relies on it.)
+/// In contrast with this, a single loop statement can produce multiple state
+/// splits, and we cannot always single out safe assumptions where we can say
+/// that "the programmer included this loop in the source code, so they clearly
+/// thought that this execution path is possible".
+/// However, the analyzer wants to explore the code in and after the loop, so
+/// it makes assumptions about the loop condition (to get a concrete execution
+/// path) even when they are not justified.
+/// This function is called by the engine to mark the `State` when it makes an
+/// assumption which is "weak". Checkers may use this heuristical mark to
+/// discard the result and reduce the amount of false positives.
+/// TODO: Instead of just marking these branches for checker-specific handling,
+/// we could discard them completely. I suspect that this could eliminate some
+/// false positives without suppressing too many true positives, but I didn't
+/// have time to measure its effects.
+ProgramStateRef recordWeakLoopAssumption(ProgramStateRef State);
+
+/// Returns true if `recordWeakLoopAssumption()` was called on the execution
+/// path which produced `State`.
+bool seenWeakLoopAssumption(ProgramStateRef State);
+
 class ExprEngine {
   void anchor();
 
@@ -322,13 +350,14 @@ class ExprEngine {
                                ExplodedNode *Pred);
 
   /// ProcessBranch - Called by CoreEngine.  Used to generate successor
-  ///  nodes by processing the 'effects' of a branch condition.
-  void processBranch(const Stmt *Condition,
-                     NodeBuilderContext& BuilderCtx,
-                     ExplodedNode *Pred,
-                     ExplodedNodeSet &Dst,
-                     const CFGBlock *DstT,
-                     const CFGBlock *DstF);
+  /// nodes by processing the 'effects' of a branch condition.
+  /// If the branch condition is a loop condition, IterationsFinishedInLoop is
+  /// the number of already finished iterations (0, 1, 2, ...); otherwise it's
+  /// std::nullopt.
+  void processBranch(const Stmt *Condition, NodeBuilderContext &BuilderCtx,
+                     ExplodedNode *Pred, ExplodedNodeSet &Dst,
+                     const CFGBlock *DstT, const CFGBlock *DstF,
+                     std::optional<unsigned> IterationsFinishedInLoop);
 
   /// Called by CoreEngine.
   /// Used to generate successor nodes for temporary destructors depending
@@ -583,11 +612,11 @@ class ExprEngine {
                                 ExplodedNode *Pred,
                                 ExplodedNodeSet &Dst);
 
-  /// evalEagerlyAssumeBinOpBifurcation - Given the nodes in 'Src', eagerly assume symbolic
-  ///  expressions of the form 'x != 0' and generate new nodes (stored in Dst)
-  ///  with those assumptions.
-  void evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src,
-                         const Expr *Ex);
+  /// evalEagerlyAssumeOpBifurcation - Given the nodes in 'Src', eagerly assume
+  /// symbolic expressions of the form 'x != 0' or '!x' and generate new nodes
+  /// (stored in Dst) with those assumptions.
+  void evalEagerlyAssumeOpBifurcation(ExplodedNodeSet &Dst,
+                                      ExplodedNodeSet &Src, const Expr *Ex);
 
   static std::pair<const ProgramPointTag *, const ProgramPointTag *>
     geteagerlyAssumeBinOpBifurcationTags();

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -697,6 +697,11 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
                                     ProgramStateRef ErrorState, Messages Msgs,
                                     NonLoc Offset, std::optional<NonLoc> Extent,
                                     bool IsTaintBug /*=false*/) const {
+  // Suppress results found through execution paths where in some loop the
+  // analyzer arbitrarily assumed either that the loop is skipped (0 iterations)
+  // or that 3 or more iterations are executed.
+  if (seenWeakLoopAssumption(ErrorState))
+    return;
 
   ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
   if (!ErrorNode)

diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -441,10 +441,32 @@ void CoreEngine::HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred) {
 void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term,
                                 const CFGBlock * B, ExplodedNode *Pred) {
   assert(B->succ_size() == 2);
+
+  const LocationContext *LC = Pred->getLocationContext();
+  BlockCounter Counter = WList->getBlockCounter();
+  unsigned BlockCount =
+      Counter.getNumVisited(LC->getStackFrame(), B->getBlockID());
+  std::optional<unsigned> IterationsFinishedInLoop = std::nullopt;
+  if (isa<ForStmt, WhileStmt, CXXForRangeStmt>(Term)) {
+    // FIXME: This code approximates the number of finished iterations based on
+    // the block count, i.e. the number of evaluations of the terminator block
+    // on the current execution path (which includes the current evaluation, so
+    // is always >= 1). This is probably acceptable for the checker-specific
+    // false positive suppression that currently uses this value, but it would
+    // be better to calcuate an accurate count of iterations.
+    assert(BlockCount >= 1);
+    IterationsFinishedInLoop = BlockCount - 1;
+  } else if (isa<DoStmt>(Term)) {
+    // FIXME: The fixme note in the previous branch also applies here.
+    // In a do-while loop one iteration happens before the first evaluation of
+    // the loop condition, so we don't subtract one from the block count.
+    IterationsFinishedInLoop = BlockCount;
+  }
+
   NodeBuilderContext Ctx(*this, B, Pred);
   ExplodedNodeSet Dst;
   ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()),
-                       *(B->succ_begin() + 1));
+                        *(B->succ_begin() + 1), IterationsFinishedInLoop);
   // Enqueue the new frontier onto the worklist.
   enqueue(Dst);
 }

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -212,6 +212,24 @@ typedef llvm::ImmutableMap<const LocationContext *, unsigned>
 REGISTER_TRAIT_WITH_PROGRAMSTATE(PendingArrayDestruction,
                                  PendingArrayDestructionMap)
 
+// This trait is used to heuristically filter out results produced from
+// execution paths that took "weak" assumptions within a loop.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(SeenWeakLoopAssumption, bool)
+
+ProgramStateRef clang::ento::recordWeakLoopAssumption(ProgramStateRef State) {
+  return State->set<SeenWeakLoopAssumption>(true);
+}
+
+bool clang::ento::seenWeakLoopAssumption(ProgramStateRef State) {
+  return State->get<SeenWeakLoopAssumption>();
+}
+
+// This trait points to the last expression (logical operator) where an eager
+// assumption introduced a state split (i.e. both cases were feasible). This is
+// used by the WeakLoopAssumption heuristic to find situations where an eager
+// assumption introduces a state split in the evaluation of a loop condition.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeAssumptionAt, const Expr *)
-REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeAssumptionAt, const Expr *)
+REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerAssumptionAt, const Expr *)
-REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeAssumptionAt, const Expr *)
+REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerAssumptionAt, const Expr *)
+
 //===----------------------------------------------------------------------===//
 // Engine construction and deletion.
 //===----------------------------------------------------------------------===//
@@ -2128,7 +2146,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
           (B->isRelationalOp() || B->isEqualityOp())) {
         ExplodedNodeSet Tmp;
         VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Tmp);
-        evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, cast<Expr>(S));
+        evalEagerlyAssumeOpBifurcation(Dst, Tmp, cast<Expr>(S));
       }
       else
         VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);
@@ -2401,7 +2419,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       if (AMgr.options.ShouldEagerlyAssume && (U->getOpcode() == UO_LNot)) {
         ExplodedNodeSet Tmp;
         VisitUnaryOperator(U, Pred, Tmp);
-        evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, U);
+        evalEagerlyAssumeOpBifurcation(Dst, Tmp, U);
       }
       else
         VisitUnaryOperator(U, Pred, Dst);
@@ -2761,12 +2779,10 @@ assumeCondition(const Stmt *Condition, ExplodedNode *N) {
   return State->assume(V);
 }
 
-void ExprEngine::processBranch(const Stmt *Condition,
-                               NodeBuilderContext& BldCtx,
-                               ExplodedNode *Pred,
-                               ExplodedNodeSet &Dst,
-                               const CFGBlock *DstT,
-                               const CFGBlock *DstF) {
+void ExprEngine::processBranch(
+    const Stmt *Condition, NodeBuilderContext &BldCtx, ExplodedNode *Pred,
+    ExplodedNodeSet &Dst, const CFGBlock *DstT, const CFGBlock *DstF,
+    std::optional<unsigned> IterationsFinishedInLoop) {
   assert((!Condition || !isa<CXXBindTemporaryExpr>(Condition)) &&
          "CXXBindTemporaryExprs are handled by processBindTemporary.");
   const LocationContext *LCtx = Pred->getLocationContext();
@@ -2804,31 +2820,59 @@ void ExprEngine::processBranch(const Stmt *Condition,
     ProgramStateRef PrevState = PredN->getState();
 
     ProgramStateRef StTrue, StFalse;
-    if (const auto KnownCondValueAssumption = assumeCondition(Condition, PredN))
+    StTrue = StFalse = PrevState;
+
+    if (const auto KnownCondValueAssumption =
+            assumeCondition(Condition, PredN)) {
       std::tie(StTrue, StFalse) = *KnownCondValueAssumption;
-    else {
-      assert(!isa<ObjCForCollectionStmt>(Condition));
-      builder.generateNode(PrevState, true, PredN);
-      builder.generateNode(PrevState, false, PredN);
-      continue;
+
+      if (!StTrue)
+        builder.markInfeasible(true);
+
+      if (!StFalse)
+        builder.markInfeasible(false);
     }
-    if (StTrue && StFalse)
+    bool BothFeasible = builder.isFeasible(true) && builder.isFeasible(false);
+
+    if (BothFeasible)
       assert(!isa<ObjCForCollectionStmt>(Condition));
 
+    const Expr *EagerlyAssumeExpr =
+        PrevState->get<LastEagerlyAssumeAssumptionAt>();
+    bool DidEagerlyAssume = EagerlyAssumeExpr == dyn_cast<Expr>(Condition);
+
     // Process the true branch.
     if (builder.isFeasible(true)) {
-      if (StTrue)
-        builder.generateNode(StTrue, true, PredN);
-      else
-        builder.markInfeasible(true);
+      if ((BothFeasible || DidEagerlyAssume) && IterationsFinishedInLoop &&
+          *IterationsFinishedInLoop >= 2) {
+        // When programmers write a loop, they imply that at least two
+        // iterations are possible (otherwise they would just write an `if`),
+        // but the third iteration is not implied: there are situations where
+        // the programmer knows that there won't be a third iteration, but
+        // this is not marked in the source code. (For example, the ffmpeg
+        // project has 2-element arrays which are accessed from loops where
+        // the number of steps is opaque and the analyzer cannot deduce that
+        // there are <= 2 iterations.)
+        // Checkers may use this heuristic mark to discard results found on
+        // branches that contain this "weak" assumption.
+        StTrue = recordWeakLoopAssumption(StTrue);
+      }
+      builder.generateNode(StTrue, true, PredN);
     }
 
     // Process the false branch.
     if (builder.isFeasible(false)) {
-      if (StFalse)
-        builder.generateNode(StFalse, false, PredN);
-      else
-        builder.markInfeasible(false);
+      if ((BothFeasible || DidEagerlyAssume) && IterationsFinishedInLoop &&
+          *IterationsFinishedInLoop == 0) {
+        // There are many situations where the programmers know that there
+        // will be at least one iteration in a loop (e.g. a structure is not
+        // empty) but the analyzer cannot deduce this and reports false
+        // positives after skipping the loop.
+        // Checkers may use this heuristic mark to discard results found on
+        // branches that contain this "weak" assumption.
+        StFalse = recordWeakLoopAssumption(StFalse);
+      }
+      builder.generateNode(StFalse, false, PredN);
     }
   }
   currBldrCtx = nullptr;
@@ -3752,9 +3796,9 @@ ExprEngine::geteagerlyAssumeBinOpBifurcationTags() {
                         &eagerlyAssumeBinOpBifurcationFalse);
 }
 
-void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
-                                                   ExplodedNodeSet &Src,
-                                                   const Expr *Ex) {
+void ExprEngine::evalEagerlyAssumeOpBifurcation(ExplodedNodeSet &Dst,
+                                                ExplodedNodeSet &Src,
+                                                const Expr *Ex) {
   StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx);
 
   for (const auto Pred : Src) {
@@ -3776,6 +3820,11 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
       ProgramStateRef StateTrue, StateFalse;
       std::tie(StateTrue, StateFalse) = state->assume(*SEV);
 
+      if (StateTrue && StateFalse) {
+        StateTrue = StateTrue->set<LastEagerlyAssumeAssumptionAt>(Ex);
+        StateFalse = StateFalse->set<LastEagerlyAssumeAssumptionAt>(Ex);
+      }
+
       // First assume that the condition is true.
       if (StateTrue) {
         SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());

diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp
@@ -349,7 +349,7 @@ int simple_unknown_bound_loop() {
 #ifdef DFS
     clang_analyzer_numTimesReached(); // expected-warning {{16}}
 #else
-    clang_analyzer_numTimesReached(); // expected-warning {{8}}
+    clang_analyzer_numTimesReached(); // expected-warning {{10}}
 #endif
   }
   return 0;
@@ -369,9 +369,9 @@ int nested_inlined_no_unroll1() {
   int k;
   for (int i = 0; i < 9; i++) {
 #ifdef DFS
-    clang_analyzer_numTimesReached(); // expected-warning {{18}}
+    clang_analyzer_numTimesReached(); // expected-warning {{20}}
 #else
-    clang_analyzer_numTimesReached(); // expected-warning {{14}}
+    clang_analyzer_numTimesReached(); // expected-warning {{18}}
 #endif
     k = simple_unknown_bound_loop();  // reevaluation without inlining, splits the state as well
   }