Skip to content

Commit cbd3801

Browse files
Flandinisteakhal
andauthored
[analyzer] Allow overriding Unknown memspaces using a ProgramState trait (#123003)
In general, if we see an allocation, we associate the immutable memory space with the constructed memory region. This works fine if we see the allocation. However, with symbolic regions it's not great because there we don't know anything about their memory spaces, thus put them into the Unknown space. The unfortunate consequence is that once we learn about some aliasing with this Symbolic Region, we can't change the memory space to the deduced one. In this patch, we open up the memory spaces as a trait, basically allowing associating a better memory space with a memregion that was created with the Unknown memory space. As a side effect, this means that now queriing the memory space of a region depends on the State, but many places in the analyzer, such as the Store, doesn't have (and cannot have) access to the State by design. This means that some uses must solely rely on the memspaces of the region, but any other users should use the getter taking a State. Co-authored-by: Balazs Benics <[email protected]>
1 parent c0d1403 commit cbd3801

20 files changed

+201
-132
lines changed

clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ namespace ento {
2727
class SValExplainer : public FullSValVisitor<SValExplainer, std::string> {
2828
private:
2929
ASTContext &ACtx;
30+
ProgramStateRef State;
3031

3132
std::string printStmt(const Stmt *S) {
3233
std::string Str;
@@ -55,7 +56,8 @@ class SValExplainer : public FullSValVisitor<SValExplainer, std::string> {
5556
}
5657

5758
public:
58-
SValExplainer(ASTContext &Ctx) : ACtx(Ctx) {}
59+
SValExplainer(ASTContext &Ctx, ProgramStateRef State)
60+
: ACtx(Ctx), State(State) {}
5961

6062
std::string VisitUnknownVal(UnknownVal V) {
6163
return "unknown value";
@@ -166,7 +168,7 @@ class SValExplainer : public FullSValVisitor<SValExplainer, std::string> {
166168
.getCanonicalType()->getAs<ObjCObjectPointerType>())
167169
return "object at " + Visit(R->getSymbol());
168170
// Other heap-based symbolic regions are also special.
169-
if (isa<HeapSpaceRegion>(R->getMemorySpace()))
171+
if (R->hasMemorySpace<HeapSpaceRegion>(State))
170172
return "heap segment that starts at " + Visit(R->getSymbol());
171173
return "pointee of " + Visit(R->getSymbol());
172174
}

clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "clang/Analysis/AnalysisDeclContext.h"
2727
#include "clang/Basic/LLVM.h"
2828
#include "clang/Basic/SourceLocation.h"
29+
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
2930
#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
3031
#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
3132
#include "llvm/ADT/DenseMap.h"
@@ -119,7 +120,40 @@ class MemRegion : public llvm::FoldingSetNode {
119120

120121
virtual MemRegionManager &getMemRegionManager() const = 0;
121122

122-
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion *getMemorySpace() const;
123+
/// Deprecated. Gets the 'raw' memory space of a memory region's base region.
124+
/// If the MemRegion is originally associated with Unknown memspace, then the
125+
/// State may have a more accurate memspace for this region.
126+
/// Use getMemorySpace(ProgramStateRef) instead.
127+
[[nodiscard]] LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion *
128+
getRawMemorySpace() const;
129+
130+
/// Deprecated. Use getMemorySpace(ProgramStateRef) instead.
131+
template <class MemSpace>
132+
[[nodiscard]] const MemSpace *getRawMemorySpaceAs() const {
133+
return dyn_cast<MemSpace>(getRawMemorySpace());
134+
}
135+
136+
/// Returns the most specific memory space for this memory region in the given
137+
/// ProgramStateRef. We may infer a more accurate memory space for unknown
138+
/// space regions and associate this in the State.
139+
[[nodiscard]] LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion *
140+
getMemorySpace(ProgramStateRef State) const;
141+
142+
template <class MemSpace>
143+
[[nodiscard]] const MemSpace *getMemorySpaceAs(ProgramStateRef State) const {
144+
return dyn_cast<MemSpace>(getMemorySpace(State));
145+
}
146+
147+
template <typename... MemorySpaces>
148+
[[nodiscard]] bool hasMemorySpace(ProgramStateRef State) const {
149+
static_assert(sizeof...(MemorySpaces));
150+
return isa<MemorySpaces...>(getMemorySpace(State));
151+
}
152+
153+
/// Set the dynamically deduced memory space of a MemRegion that currently has
154+
/// UnknownSpaceRegion. \p Space shouldn't be UnknownSpaceRegion.
155+
[[nodiscard]] ProgramStateRef
156+
setMemorySpace(ProgramStateRef State, const MemSpaceRegion *Space) const;
123157

124158
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion *getBaseRegion() const;
125159

@@ -140,12 +174,6 @@ class MemRegion : public llvm::FoldingSetNode {
140174
/// It might return null.
141175
const SymbolicRegion *getSymbolicBase() const;
142176

143-
bool hasStackStorage() const;
144-
145-
bool hasStackNonParametersStorage() const;
146-
147-
bool hasStackParametersStorage() const;
148-
149177
/// Compute the offset within the top level memory object.
150178
RegionOffset getAsOffset() const;
151179

clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
7878
}
7979

8080
class StateUpdateReporter {
81+
const MemSpaceRegion *Space;
8182
const SubRegion *Reg;
8283
const NonLoc ByteOffsetVal;
8384
const std::optional<QualType> ElementType;
@@ -88,8 +89,8 @@ class StateUpdateReporter {
8889
public:
8990
StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
9091
CheckerContext &C)
91-
: Reg(R), ByteOffsetVal(ByteOffsVal),
92-
ElementType(determineElementType(E, C)),
92+
: Space(R->getMemorySpace(C.getState())), Reg(R),
93+
ByteOffsetVal(ByteOffsVal), ElementType(determineElementType(E, C)),
9394
ElementSize(determineElementSize(ElementType, C)) {}
9495

9596
void recordNonNegativeAssumption() { AssumedNonNegative = true; }
@@ -352,7 +353,8 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
352353
return {nullptr, nullptr};
353354
}
354355

355-
static std::string getRegionName(const SubRegion *Region) {
356+
static std::string getRegionName(const MemSpaceRegion *Space,
357+
const SubRegion *Region) {
356358
if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
357359
return RegName;
358360

@@ -367,8 +369,7 @@ static std::string getRegionName(const SubRegion *Region) {
367369
if (isa<AllocaRegion>(Region))
368370
return "the memory returned by 'alloca'";
369371

370-
if (isa<SymbolicRegion>(Region) &&
371-
isa<HeapSpaceRegion>(Region->getMemorySpace()))
372+
if (isa<SymbolicRegion>(Region) && isa<HeapSpaceRegion>(Space))
372373
return "the heap area";
373374

374375
if (isa<StringRegion>(Region))
@@ -388,8 +389,9 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
388389
return SV ? getConcreteValue(*SV) : std::nullopt;
389390
}
390391

391-
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
392-
std::string RegName = getRegionName(Region), OffsetStr = "";
392+
static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
393+
const SubRegion *Region, NonLoc Offset) {
394+
std::string RegName = getRegionName(Space, Region), OffsetStr = "";
393395

394396
if (auto ConcreteOffset = getConcreteValue(Offset))
395397
OffsetStr = formatv(" {0}", ConcreteOffset);
@@ -418,10 +420,11 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
418420
return true;
419421
}
420422

421-
static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
422-
NonLoc Offset, NonLoc Extent, SVal Location,
423+
static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
424+
const SubRegion *Region, NonLoc Offset,
425+
NonLoc Extent, SVal Location,
423426
bool AlsoMentionUnderflow) {
424-
std::string RegName = getRegionName(Region);
427+
std::string RegName = getRegionName(Space, Region);
425428
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
426429
assert(EReg && "this checker only handles element access");
427430
QualType ElemType = EReg->getElementType();
@@ -468,9 +471,10 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
468471
std::string(Buf)};
469472
}
470473

471-
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,
474+
static Messages getTaintMsgs(const MemSpaceRegion *Space,
475+
const SubRegion *Region, const char *OffsetName,
472476
bool AlsoMentionUnderflow) {
473-
std::string RegName = getRegionName(Region);
477+
std::string RegName = getRegionName(Space, Region);
474478
return {formatv("Potential out of bound access to {0} with tainted {1}",
475479
RegName, OffsetName),
476480
formatv("Access of {0} with a tainted {1} that may be {2}too large",
@@ -539,7 +543,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
539543
<< "' elements in ";
540544
else
541545
Out << "the extent of ";
542-
Out << getRegionName(Reg);
546+
Out << getRegionName(Space, Reg);
543547
}
544548
return std::string(Out.str());
545549
}
@@ -589,7 +593,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
589593
StateUpdateReporter SUR(Reg, ByteOffset, E, C);
590594

591595
// CHECK LOWER BOUND
592-
const MemSpaceRegion *Space = Reg->getMemorySpace();
596+
const MemSpaceRegion *Space = Reg->getMemorySpace(State);
593597
if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
594598
// A symbolic region in unknown space represents an unknown pointer that
595599
// may point into the middle of an array, so we don't look for underflows.
@@ -632,7 +636,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
632636
} else {
633637
if (!WithinLowerBound) {
634638
// ...and it cannot be valid (>= 0), so report an error.
635-
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
639+
Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset);
636640
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
637641
return;
638642
}
@@ -675,8 +679,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
675679
}
676680

677681
Messages Msgs =
678-
getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
679-
Location, AlsoMentionUnderflow);
682+
getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset,
683+
*KnownSize, Location, AlsoMentionUnderflow);
680684
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
681685
return;
682686
}
@@ -692,7 +696,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
692696
if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
693697
OffsetName = "index";
694698

695-
Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
699+
Messages Msgs =
700+
getTaintMsgs(Space, Reg, OffsetName, AlsoMentionUnderflow);
696701
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
697702
/*IsTaintBug=*/true);
698703
return;

clang/lib/StaticAnalyzer/Checkers/ErrnoChecker.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ ProgramStateRef ErrnoChecker::checkRegionChanges(
231231

232232
// Always reset errno state when the system memory space is invalidated.
233233
// The ErrnoRegion is not always found in the list in this case.
234-
if (llvm::is_contained(Regions, ErrnoRegion->getMemorySpace()))
234+
if (llvm::is_contained(Regions, ErrnoRegion->getMemorySpace(State)))
235235
return clearErrnoState(State);
236236

237237
return State;

clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ void ExprInspectionChecker::analyzerExplain(const CallExpr *CE,
257257
return;
258258

259259
SVal V = C.getSVal(Arg);
260-
SValExplainer Ex(C.getASTContext());
260+
SValExplainer Ex(C.getASTContext(), C.getState());
261261
reportBug(Ex.Visit(V), C);
262262
}
263263

clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C,
134134
// routine arguments in the heap.
135135
while (const MemRegion *MR = Sym->getOriginRegion()) {
136136
const auto *VR = dyn_cast<VarRegion>(MR);
137-
if (VR && VR->hasStackParametersStorage() &&
138-
VR->getStackFrame()->inTopFrame())
137+
if (VR && VR->hasMemorySpace<StackArgumentsSpaceRegion>(C.getState()) &&
138+
VR->getStackFrame()->inTopFrame())
139139
return cast<ParmVarDecl>(VR->getDecl());
140140

141141
const SymbolicRegion *SR = MR->getSymbolicBase();

clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,8 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
7676
return;
7777

7878
// Global variables are fine.
79-
const MemRegion *RB = R->getBaseRegion();
80-
const MemSpaceRegion *RS = RB->getMemorySpace();
81-
if (isa<GlobalsSpaceRegion>(RS))
79+
const MemSpaceRegion *Space = R->getMemorySpace(C.getState());
80+
if (isa<GlobalsSpaceRegion>(Space))
8281
return;
8382

8483
// Handle _dispatch_once. In some versions of the OS X SDK we have the case
@@ -95,7 +94,7 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
9594
llvm::raw_svector_ostream os(S);
9695
bool SuggestStatic = false;
9796
os << "Call to '" << FName << "' uses";
98-
if (const VarRegion *VR = dyn_cast<VarRegion>(RB)) {
97+
if (const VarRegion *VR = dyn_cast<VarRegion>(R->getBaseRegion())) {
9998
const VarDecl *VD = VR->getDecl();
10099
// FIXME: These should have correct memory space and thus should be filtered
101100
// out earlier. This branch only fires when we're looking from a block,
@@ -117,9 +116,9 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
117116
if (IVR != R)
118117
os << " memory within";
119118
os << " the instance variable '" << IVR->getDecl()->getName() << '\'';
120-
} else if (isa<HeapSpaceRegion>(RS)) {
119+
} else if (isa<HeapSpaceRegion>(Space)) {
121120
os << " heap-allocated memory";
122-
} else if (isa<UnknownSpaceRegion>(RS)) {
121+
} else if (isa<UnknownSpaceRegion>(Space)) {
123122
// Presence of an IVar superregion has priority over this branch, because
124123
// ObjC objects are on the heap even if the core doesn't realize this.
125124
// Presence of a block variable base region has priority over this branch,

clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,8 @@ class MallocChecker
784784
bool IsALeakCheck = false) const;
785785
///@}
786786
static bool SummarizeValue(raw_ostream &os, SVal V);
787-
static bool SummarizeRegion(raw_ostream &os, const MemRegion *MR);
787+
static bool SummarizeRegion(ProgramStateRef State, raw_ostream &os,
788+
const MemRegion *MR);
788789

789790
void HandleNonHeapDealloc(CheckerContext &C, SVal ArgVal, SourceRange Range,
790791
const Expr *DeallocExpr,
@@ -2202,11 +2203,9 @@ MallocChecker::FreeMemAux(CheckerContext &C, const Expr *ArgExpr,
22022203
return nullptr;
22032204
}
22042205

2205-
const MemSpaceRegion *MS = R->getMemorySpace();
2206-
22072206
// Parameters, locals, statics, globals, and memory returned by
22082207
// __builtin_alloca() shouldn't be freed.
2209-
if (!isa<UnknownSpaceRegion, HeapSpaceRegion>(MS)) {
2208+
if (!R->hasMemorySpace<UnknownSpaceRegion, HeapSpaceRegion>(State)) {
22102209
// Regions returned by malloc() are represented by SymbolicRegion objects
22112210
// within HeapSpaceRegion. Of course, free() can work on memory allocated
22122211
// outside the current function, so UnknownSpaceRegion is also a
@@ -2384,7 +2383,7 @@ bool MallocChecker::SummarizeValue(raw_ostream &os, SVal V) {
23842383
return true;
23852384
}
23862385

2387-
bool MallocChecker::SummarizeRegion(raw_ostream &os,
2386+
bool MallocChecker::SummarizeRegion(ProgramStateRef State, raw_ostream &os,
23882387
const MemRegion *MR) {
23892388
switch (MR->getKind()) {
23902389
case MemRegion::FunctionCodeRegionKind: {
@@ -2403,7 +2402,7 @@ bool MallocChecker::SummarizeRegion(raw_ostream &os,
24032402
os << "a block";
24042403
return true;
24052404
default: {
2406-
const MemSpaceRegion *MS = MR->getMemorySpace();
2405+
const MemSpaceRegion *MS = MR->getMemorySpace(State);
24072406

24082407
if (isa<StackLocalsSpaceRegion>(MS)) {
24092408
const VarRegion *VR = dyn_cast<VarRegion>(MR);
@@ -2489,8 +2488,8 @@ void MallocChecker::HandleNonHeapDealloc(CheckerContext &C, SVal ArgVal,
24892488
os << "deallocator";
24902489

24912490
os << " is ";
2492-
bool Summarized = MR ? SummarizeRegion(os, MR)
2493-
: SummarizeValue(os, ArgVal);
2491+
bool Summarized =
2492+
MR ? SummarizeRegion(C.getState(), os, MR) : SummarizeValue(os, ArgVal);
24942493
if (Summarized)
24952494
os << ", which is not memory allocated by ";
24962495
else

0 commit comments

Comments
 (0)