Skip to content

[analyzer] Allow overriding Unknown memspaces using a ProgramState trait #123003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace ento {
class SValExplainer : public FullSValVisitor<SValExplainer, std::string> {
private:
ASTContext &ACtx;
ProgramStateRef State;

std::string printStmt(const Stmt *S) {
std::string Str;
Expand Down Expand Up @@ -55,7 +56,8 @@ class SValExplainer : public FullSValVisitor<SValExplainer, std::string> {
}

public:
SValExplainer(ASTContext &Ctx) : ACtx(Ctx) {}
SValExplainer(ASTContext &Ctx, ProgramStateRef State)
: ACtx(Ctx), State(State) {}

std::string VisitUnknownVal(UnknownVal V) {
return "unknown value";
Expand Down Expand Up @@ -166,7 +168,7 @@ class SValExplainer : public FullSValVisitor<SValExplainer, std::string> {
.getCanonicalType()->getAs<ObjCObjectPointerType>())
return "object at " + Visit(R->getSymbol());
// Other heap-based symbolic regions are also special.
if (isa<HeapSpaceRegion>(R->getMemorySpace()))
if (R->hasMemorySpace<HeapSpaceRegion>(State))
return "heap segment that starts at " + Visit(R->getSymbol());
return "pointee of " + Visit(R->getSymbol());
}
Expand Down
42 changes: 35 additions & 7 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "clang/Analysis/AnalysisDeclContext.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -119,7 +120,40 @@ class MemRegion : public llvm::FoldingSetNode {

virtual MemRegionManager &getMemRegionManager() const = 0;

LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion *getMemorySpace() const;
/// Deprecated. Gets the 'raw' memory space of a memory region's base region.
/// If the MemRegion is originally associated with Unknown memspace, then the
/// State may have a more accurate memspace for this region.
/// Use getMemorySpace(ProgramStateRef) instead.
[[nodiscard]] LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion *
getRawMemorySpace() const;

/// Deprecated. Use getMemorySpace(ProgramStateRef) instead.
template <class MemSpace>
[[nodiscard]] const MemSpace *getRawMemorySpaceAs() const {
return dyn_cast<MemSpace>(getRawMemorySpace());
}

/// Returns the most specific memory space for this memory region in the given
/// ProgramStateRef. We may infer a more accurate memory space for unknown
/// space regions and associate this in the State.
[[nodiscard]] LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion *
getMemorySpace(ProgramStateRef State) const;

template <class MemSpace>
[[nodiscard]] const MemSpace *getMemorySpaceAs(ProgramStateRef State) const {
return dyn_cast<MemSpace>(getMemorySpace(State));
}

template <typename... MemorySpaces>
[[nodiscard]] bool hasMemorySpace(ProgramStateRef State) const {
static_assert(sizeof...(MemorySpaces));
return isa<MemorySpaces...>(getMemorySpace(State));
}

/// Set the dynamically deduced memory space of a MemRegion that currently has
/// UnknownSpaceRegion. \p Space shouldn't be UnknownSpaceRegion.
[[nodiscard]] ProgramStateRef
setMemorySpace(ProgramStateRef State, const MemSpaceRegion *Space) const;

LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion *getBaseRegion() const;

Expand All @@ -140,12 +174,6 @@ class MemRegion : public llvm::FoldingSetNode {
/// It might return null.
const SymbolicRegion *getSymbolicBase() const;

bool hasStackStorage() const;

bool hasStackNonParametersStorage() const;

bool hasStackParametersStorage() const;

/// Compute the offset within the top level memory object.
RegionOffset getAsOffset() const;

Expand Down
41 changes: 23 additions & 18 deletions clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
}

class StateUpdateReporter {
const MemSpaceRegion *Space;
const SubRegion *Reg;
const NonLoc ByteOffsetVal;
const std::optional<QualType> ElementType;
Expand All @@ -88,8 +89,8 @@ class StateUpdateReporter {
public:
StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
CheckerContext &C)
: Reg(R), ByteOffsetVal(ByteOffsVal),
ElementType(determineElementType(E, C)),
: Space(R->getMemorySpace(C.getState())), Reg(R),
ByteOffsetVal(ByteOffsVal), ElementType(determineElementType(E, C)),
ElementSize(determineElementSize(ElementType, C)) {}

void recordNonNegativeAssumption() { AssumedNonNegative = true; }
Expand Down Expand Up @@ -352,7 +353,8 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
return {nullptr, nullptr};
}

static std::string getRegionName(const SubRegion *Region) {
static std::string getRegionName(const MemSpaceRegion *Space,
const SubRegion *Region) {
if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
return RegName;

Expand All @@ -367,8 +369,7 @@ static std::string getRegionName(const SubRegion *Region) {
if (isa<AllocaRegion>(Region))
return "the memory returned by 'alloca'";

if (isa<SymbolicRegion>(Region) &&
isa<HeapSpaceRegion>(Region->getMemorySpace()))
if (isa<SymbolicRegion>(Region) && isa<HeapSpaceRegion>(Space))
return "the heap area";

if (isa<StringRegion>(Region))
Expand All @@ -388,8 +389,9 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
return SV ? getConcreteValue(*SV) : std::nullopt;
}

static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
std::string RegName = getRegionName(Region), OffsetStr = "";
static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
const SubRegion *Region, NonLoc Offset) {
std::string RegName = getRegionName(Space, Region), OffsetStr = "";

if (auto ConcreteOffset = getConcreteValue(Offset))
OffsetStr = formatv(" {0}", ConcreteOffset);
Expand Down Expand Up @@ -418,10 +420,11 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
return true;
}

static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
NonLoc Offset, NonLoc Extent, SVal Location,
static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
const SubRegion *Region, NonLoc Offset,
NonLoc Extent, SVal Location,
bool AlsoMentionUnderflow) {
std::string RegName = getRegionName(Region);
std::string RegName = getRegionName(Space, Region);
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
assert(EReg && "this checker only handles element access");
QualType ElemType = EReg->getElementType();
Expand Down Expand Up @@ -468,9 +471,10 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
std::string(Buf)};
}

static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,
static Messages getTaintMsgs(const MemSpaceRegion *Space,
const SubRegion *Region, const char *OffsetName,
bool AlsoMentionUnderflow) {
std::string RegName = getRegionName(Region);
std::string RegName = getRegionName(Space, Region);
return {formatv("Potential out of bound access to {0} with tainted {1}",
RegName, OffsetName),
formatv("Access of {0} with a tainted {1} that may be {2}too large",
Expand Down Expand Up @@ -539,7 +543,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
<< "' elements in ";
else
Out << "the extent of ";
Out << getRegionName(Reg);
Out << getRegionName(Space, Reg);
}
return std::string(Out.str());
}
Expand Down Expand Up @@ -589,7 +593,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
StateUpdateReporter SUR(Reg, ByteOffset, E, C);

// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace();
const MemSpaceRegion *Space = Reg->getMemorySpace(State);
if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
// A symbolic region in unknown space represents an unknown pointer that
// may point into the middle of an array, so we don't look for underflows.
Expand Down Expand Up @@ -632,7 +636,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
} else {
if (!WithinLowerBound) {
// ...and it cannot be valid (>= 0), so report an error.
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset);
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
return;
}
Expand Down Expand Up @@ -675,8 +679,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
}

Messages Msgs =
getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
Location, AlsoMentionUnderflow);
getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset,
*KnownSize, Location, AlsoMentionUnderflow);
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
return;
}
Expand All @@ -692,7 +696,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
OffsetName = "index";

Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
Messages Msgs =
getTaintMsgs(Space, Reg, OffsetName, AlsoMentionUnderflow);
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
/*IsTaintBug=*/true);
return;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/StaticAnalyzer/Checkers/ErrnoChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ ProgramStateRef ErrnoChecker::checkRegionChanges(

// Always reset errno state when the system memory space is invalidated.
// The ErrnoRegion is not always found in the list in this case.
if (llvm::is_contained(Regions, ErrnoRegion->getMemorySpace()))
if (llvm::is_contained(Regions, ErrnoRegion->getMemorySpace(State)))
return clearErrnoState(State);

return State;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ void ExprInspectionChecker::analyzerExplain(const CallExpr *CE,
return;

SVal V = C.getSVal(Arg);
SValExplainer Ex(C.getASTContext());
SValExplainer Ex(C.getASTContext(), C.getState());
reportBug(Ex.Visit(V), C);
}

Expand Down
4 changes: 2 additions & 2 deletions clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C,
// routine arguments in the heap.
while (const MemRegion *MR = Sym->getOriginRegion()) {
const auto *VR = dyn_cast<VarRegion>(MR);
if (VR && VR->hasStackParametersStorage() &&
VR->getStackFrame()->inTopFrame())
if (VR && VR->hasMemorySpace<StackArgumentsSpaceRegion>(C.getState()) &&
VR->getStackFrame()->inTopFrame())
return cast<ParmVarDecl>(VR->getDecl());

const SymbolicRegion *SR = MR->getSymbolicBase();
Expand Down
11 changes: 5 additions & 6 deletions clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,8 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
return;

// Global variables are fine.
const MemRegion *RB = R->getBaseRegion();
const MemSpaceRegion *RS = RB->getMemorySpace();
if (isa<GlobalsSpaceRegion>(RS))
const MemSpaceRegion *Space = R->getMemorySpace(C.getState());
if (isa<GlobalsSpaceRegion>(Space))
return;

// Handle _dispatch_once. In some versions of the OS X SDK we have the case
Expand All @@ -95,7 +94,7 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
llvm::raw_svector_ostream os(S);
bool SuggestStatic = false;
os << "Call to '" << FName << "' uses";
if (const VarRegion *VR = dyn_cast<VarRegion>(RB)) {
if (const VarRegion *VR = dyn_cast<VarRegion>(R->getBaseRegion())) {
const VarDecl *VD = VR->getDecl();
// FIXME: These should have correct memory space and thus should be filtered
// out earlier. This branch only fires when we're looking from a block,
Expand All @@ -117,9 +116,9 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
if (IVR != R)
os << " memory within";
os << " the instance variable '" << IVR->getDecl()->getName() << '\'';
} else if (isa<HeapSpaceRegion>(RS)) {
} else if (isa<HeapSpaceRegion>(Space)) {
os << " heap-allocated memory";
} else if (isa<UnknownSpaceRegion>(RS)) {
} else if (isa<UnknownSpaceRegion>(Space)) {
// Presence of an IVar superregion has priority over this branch, because
// ObjC objects are on the heap even if the core doesn't realize this.
// Presence of a block variable base region has priority over this branch,
Expand Down
15 changes: 7 additions & 8 deletions clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,8 @@ class MallocChecker
bool IsALeakCheck = false) const;
///@}
static bool SummarizeValue(raw_ostream &os, SVal V);
static bool SummarizeRegion(raw_ostream &os, const MemRegion *MR);
static bool SummarizeRegion(ProgramStateRef State, raw_ostream &os,
const MemRegion *MR);

void HandleNonHeapDealloc(CheckerContext &C, SVal ArgVal, SourceRange Range,
const Expr *DeallocExpr,
Expand Down Expand Up @@ -2202,11 +2203,9 @@ MallocChecker::FreeMemAux(CheckerContext &C, const Expr *ArgExpr,
return nullptr;
}

const MemSpaceRegion *MS = R->getMemorySpace();

// Parameters, locals, statics, globals, and memory returned by
// __builtin_alloca() shouldn't be freed.
if (!isa<UnknownSpaceRegion, HeapSpaceRegion>(MS)) {
if (!R->hasMemorySpace<UnknownSpaceRegion, HeapSpaceRegion>(State)) {
// Regions returned by malloc() are represented by SymbolicRegion objects
// within HeapSpaceRegion. Of course, free() can work on memory allocated
// outside the current function, so UnknownSpaceRegion is also a
Expand Down Expand Up @@ -2384,7 +2383,7 @@ bool MallocChecker::SummarizeValue(raw_ostream &os, SVal V) {
return true;
}

bool MallocChecker::SummarizeRegion(raw_ostream &os,
bool MallocChecker::SummarizeRegion(ProgramStateRef State, raw_ostream &os,
const MemRegion *MR) {
switch (MR->getKind()) {
case MemRegion::FunctionCodeRegionKind: {
Expand All @@ -2403,7 +2402,7 @@ bool MallocChecker::SummarizeRegion(raw_ostream &os,
os << "a block";
return true;
default: {
const MemSpaceRegion *MS = MR->getMemorySpace();
const MemSpaceRegion *MS = MR->getMemorySpace(State);

if (isa<StackLocalsSpaceRegion>(MS)) {
const VarRegion *VR = dyn_cast<VarRegion>(MR);
Expand Down Expand Up @@ -2489,8 +2488,8 @@ void MallocChecker::HandleNonHeapDealloc(CheckerContext &C, SVal ArgVal,
os << "deallocator";

os << " is ";
bool Summarized = MR ? SummarizeRegion(os, MR)
: SummarizeValue(os, ArgVal);
bool Summarized =
MR ? SummarizeRegion(C.getState(), os, MR) : SummarizeValue(os, ArgVal);
if (Summarized)
os << ", which is not memory allocated by ";
else
Expand Down
Loading
Loading