diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 34bb7a809162b..a9b8d0753673b 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -294,6 +294,16 @@ ANALYZER_OPTION( bool, ShouldUnrollLoops, "unroll-loops", "Whether the analysis should try to unroll loops with known bounds.", false) +ANALYZER_OPTION( + bool, ShouldAssumeAtLeastOneIteration, "assume-at-least-one-iteration", + "Whether the analyzer should always assume at least one iteration in " + "loops where the loop condition is opaque (i.e. the analyzer cannot " + "determine if it's true or false). Setting this to true eliminates some " + "false positives (where e.g. a structure is nonempty, but the analyzer " + "does not notice this); but it also eliminates some true positives (e.g. " + "cases where a structure can be empty and this causes buggy behavior).", + false) + ANALYZER_OPTION( bool, ShouldDisplayNotesAsEvents, "notes-as-events", "Whether the bug reporter should transparently treat extra note diagnostic " diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 2b1872f8386aa..18589f39d2146 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2810,13 +2810,24 @@ void ExprEngine::processBranch( if (StTrue && StFalse) assert(!isa(Condition)); + // We want to ensure consistent behavior between `eagerly-assume=false`, + // when the state split is always performed by the `assumeCondition()` + // call within this function and `eagerly-assume=true` (the default), when + // some conditions (comparison operators, unary negation) can trigger a + // state split before this callback. There are some contrived corner cases + // that behave differently with and without `eagerly-assume`, but I don't + // know about an example that could plausibly appear in "real" code. + bool BothFeasible = + (StTrue && StFalse) || + didEagerlyAssumeBifurcateAt(PrevState, dyn_cast(Condition)); + if (StTrue) { - // If we are processing a loop condition where two iterations have - // already been completed and the false branch is also feasible, then - // don't assume a third iteration because it is a redundant execution - // path (unlikely to be different from earlier loop exits) and can cause - // false positives if e.g. the loop iterates over a two-element structure - // with an opaque condition. + // In a loop, if both branches are feasible (i.e. the analyzer doesn't + // understand the loop condition) and two iterations have already been + // completed, then don't assume a third iteration because it is a + // redundant execution path (unlikely to be different from earlier loop + // exits) and can cause false positives if e.g. the loop iterates over a + // two-element structure with an opaque condition. // // The iteration count "2" is hardcoded because it's the natural limit: // * the fact that the programmer wrote a loop (and not just an `if`) @@ -2827,10 +2838,7 @@ void ExprEngine::processBranch( // two iterations". (This pattern is common in FFMPEG and appears in // many other projects as well.) bool CompletedTwoIterations = IterationsCompletedInLoop.value_or(0) >= 2; - bool FalseAlsoFeasible = - StFalse || - didEagerlyAssumeBifurcateAt(PrevState, dyn_cast(Condition)); - bool SkipTrueBranch = CompletedTwoIterations && FalseAlsoFeasible; + bool SkipTrueBranch = BothFeasible && CompletedTwoIterations; // FIXME: This "don't assume third iteration" heuristic partially // conflicts with the widen-loop analysis option (which is off by @@ -2840,8 +2848,25 @@ void ExprEngine::processBranch( Builder.generateNode(StTrue, true, PredN); } - if (StFalse) - Builder.generateNode(StFalse, false, PredN); + if (StFalse) { + // In a loop, if both branches are feasible (i.e. the analyzer doesn't + // understand the loop condition), we are before the first iteration and + // the analyzer option `assume-at-least-one-iteration` is set to `true`, + // then avoid creating the execution path where the loop is skipped. + // + // In some situations this "loop is skipped" execution path is an + // important corner case that may evade the notice of the developer and + // hide significant bugs -- however, there are also many situations where + // it's guaranteed that at least one iteration will happen (e.g. some + // data structure is always nonempty), but the analyzer cannot realize + // this and will produce false positives when it assumes that the loop is + // skipped. + bool BeforeFirstIteration = IterationsCompletedInLoop == std::optional{0}; + bool SkipFalseBranch = BothFeasible && BeforeFirstIteration && + AMgr.options.ShouldAssumeAtLeastOneIteration; + if (!SkipFalseBranch) + Builder.generateNode(StFalse, false, PredN); + } } currBldrCtx = nullptr; } diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index d5eb790b82f23..f6a49680917ac 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -10,6 +10,7 @@ // CHECK-NEXT: alpha.cplusplus.STLAlgorithmModeling:AggressiveStdFindModeling = false // CHECK-NEXT: alpha.osx.cocoa.DirectIvarAssignment:AnnotatedFunctions = false // CHECK-NEXT: apply-fixits = false +// CHECK-NEXT: assume-at-least-one-iteration = false // CHECK-NEXT: assume-controlled-environment = false // CHECK-NEXT: avoid-suppressing-null-argument-paths = false // CHECK-NEXT: c++-allocator-inlining = true diff --git a/clang/test/Analysis/loop-assumptions.c b/clang/test/Analysis/loop-assumptions.c index eb0ffdce722e0..b61ed8815e3f6 100644 --- a/clang/test/Analysis/loop-assumptions.c +++ b/clang/test/Analysis/loop-assumptions.c @@ -1,25 +1,48 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \ -// RUN: -verify=expected,eagerlyassume %s +// RUN: -verify=expected,noassumeone,eagerlyassume,combo %s // RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ +// RUN: -verify=expected,noassumeone,noeagerlyassume,combo %s +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-config assume-at-least-one-iteration=true \ +// RUN: -verify=expected,eagerlyassume,combo %s +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-config assume-at-least-one-iteration=true,eagerly-assume=false \ // RUN: -verify=expected,noeagerlyassume %s +// The verify tag "combo" is used for one unique warning which is produced in three +// of the four RUN combinations. + // These tests validate the logic within `ExprEngine::processBranch` which // ensures that in loops with opaque conditions we don't assume execution paths // if the code does not imply that they are possible. +// In particular, if two (or more) iterations are already completed in a loop, +// we don't assume that there can be another iteration. Moreover, if the +// analyzer option `assume-at-least-one-iteration` is enabled, then we don't +// assume that a loop can be skipped completely. void clang_analyzer_numTimesReached(void); -void clang_analyzer_warnIfReached(void); void clang_analyzer_dump(int); -void clearCondition(void) { - // If the analyzer can definitely determine the value of the loop condition, +void clearTrueCondition(void) { + // If the analyzer can definitely determine that the loop condition is true, // then this corrective logic doesn't activate and the engine executes // `-analyzer-max-loop` iterations (by default, 4). - for (int i = 0; i < 10; i++) + int i; + for (i = 0; i < 10; i++) clang_analyzer_numTimesReached(); // expected-warning {{4}} - clang_analyzer_warnIfReached(); // unreachable + clang_analyzer_dump(i); // Unreachable, no reports. +} + +void clearFalseCondition(void) { + // If the analyzer can definitely determine that the loop condition is false, + // then the loop is skipped, even in `assume-at-least-one-iteration` mode. + int i; + for (i = 0; i > 10; i++) + clang_analyzer_numTimesReached(); // Unreachable, no report. + + clang_analyzer_dump(i); // expected-warning {{0}} } void opaqueCondition(int arg) { @@ -28,10 +51,13 @@ void opaqueCondition(int arg) { // that more than two iterations are possible. (It _does_ imply that two // iterations may be possible at least in some cases, because otherwise an // `if` would've been enough.) - for (int i = 0; i < arg; i++) + // Moreover, if `assume-at-least-one-iteration` is enabled, then assume at + // least one iteration. + int i; + for (i = 0; i < arg; i++) clang_analyzer_numTimesReached(); // expected-warning {{2}} - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}} } int check(void); @@ -42,22 +68,26 @@ void opaqueConditionCall(int arg) { // insert an assertion to guide the analyzer and rule out more than two // iterations (so the analyzer needs to proactively avoid those unjustified // branches). - while (check()) + int i = 0; // Helper to distinguish the the branches after the loop. + while (check()) { clang_analyzer_numTimesReached(); // expected-warning {{2}} + i++; + } - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}} } void opaqueConditionDoWhile(int arg) { // Same situation as `opaqueCondition()` but with a `do {} while ()` loop. // This is tested separately because this loop type is a special case in the // iteration count calculation. + // Obviously, this loop guarantees that at least one iteration will happen. int i = 0; do { clang_analyzer_numTimesReached(); // expected-warning {{2}} } while (i++ < arg); - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + clang_analyzer_dump(i); // expected-warning {{1}} expected-warning {{2}} } void dontRememberOldBifurcation(int arg) { @@ -69,7 +99,7 @@ void dontRememberOldBifurcation(int arg) { // by default), because the code remembered that there was a bifurcation on // the first iteration of the loop and didn't realize that this is obsolete. - // NOTE: The variable `i` is introduced to ensure that the iterations of the + // NOTE: The variable `i` is significant to ensure that the iterations of the // loop change the state -- otherwise the analyzer stops iterating because it // returns to the same `ExplodedNode`. int i = 0; @@ -78,10 +108,12 @@ void dontRememberOldBifurcation(int arg) { i++; } - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + clang_analyzer_dump(i); // noassumeone-warning {{0}} } void dontAssumeFourthIterartion(int arg) { + int i; + if (arg == 2) return; @@ -89,10 +121,10 @@ void dontAssumeFourthIterartion(int arg) { // iterations (because it knows that `arg != 2` at that point), so it // performs a third iteration, but it does not assume that a fourth iteration // is also possible. - for (int i = 0; i < arg; i++) + for (i = 0; i < arg; i++) clang_analyzer_numTimesReached(); // expected-warning {{3}} - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{3}} } #define TRUE 1 @@ -108,42 +140,53 @@ void shortCircuitInLoopCondition(int arg) { // false positive on the ffmpeg codebase. Eventually we should properly // recognize the full syntactical loop condition expression as "the loop // condition", but this will be complicated to implement. - for (int i = 0; i < arg && TRUE; i++) { + int i; + for (i = 0; i < arg && TRUE; i++) { clang_analyzer_numTimesReached(); // expected-warning {{4}} } - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + clang_analyzer_dump(i); // expected-warning {{0}} expected-warning {{1}} expected-warning {{2}} expected-warning {{3}} } void shortCircuitInLoopConditionRHS(int arg) { // Unlike `shortCircuitInLoopCondition()`, this case is handled properly // because the analyzer thinks that the right hand side of the `&&` is the // loop condition. - for (int i = 0; TRUE && i < arg; i++) { + int i; + for (i = 0; TRUE && i < arg; i++) { clang_analyzer_numTimesReached(); // expected-warning {{2}} } - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}} } void eagerlyAssumeInSubexpression(int arg) { // The `EagerlyAssume` logic is another complication that can "split the // state" within the loop condition, but before the `processBranch()` call - // which is (in theory) responsible for evaluating the loop condition. - // The current implementation partially compensates this by noticing the + // which would be "naturally" responsible for evaluating the loop condition. + // The current implementation tries to handle this by noticing the // cases where the loop condition is targeted by `EagerlyAssume`, but does // not handle the (fortunately rare) case when `EagerlyAssume` hits a // sub-expression of the loop condition (as in this contrived test case). - // FIXME: I don't know a real-world example for this inconsistency, but it - // would be good to eliminate it eventually. - for (int i = 0; (i >= arg) - 1; i++) { + // FIXME: It would be good to eventually eliminate this inconsistency, but + // I don't know a realistic example that could appear in real-world code, so + // this seems to be a low-priority goal. + int i; + for (i = 0; (i >= arg) - 1; i++) { clang_analyzer_numTimesReached(); // eagerlyassume-warning {{4}} noeagerlyassume-warning {{2}} } - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + // The 'combo' note intentionally appears if `assume-at-least-one-iteration` + // is disabled, but also appears as a bug when `eagerly-assume` and + // `assume-at-least-one-iteration` are both enabled. + clang_analyzer_dump(i); // combo-warning {{0}} expected-warning {{1}} expected-warning {{2}} eagerlyassume-warning {{3}} } void calledTwice(int arg, int isFirstCall) { // This function is called twice (with two different unknown 'arg' values) to // check the iteration count handling in this situation. - for (int i = 0; i < arg; i++) { + int i; + for (i = 0; i < arg; i++) { if (isFirstCall) { clang_analyzer_numTimesReached(); // expected-warning {{2}} } else { @@ -215,5 +258,5 @@ void onlyLoopConditions(int arg) { break; } - clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + clang_analyzer_dump(i); // expected-warning {{1}} expected-warning {{2}} expected-warning {{3}} expected-warning {{4}} }