Skip to content

PR for llvm/llvm-project#79175 #80274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/include/llvm/Analysis/AliasAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ class AAQueryInfo {
/// store %l, ...
bool MayBeCrossIteration = false;

/// Whether alias analysis is allowed to use the dominator tree, for use by
/// passes that lazily update the DT while performing AA queries.
bool UseDominatorTree = true;

AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {}
};

Expand Down Expand Up @@ -668,6 +672,9 @@ class BatchAAResults {
void enableCrossIterationMode() {
AAQI.MayBeCrossIteration = true;
}

/// Disable the use of the dominator tree during alias analysis queries.
void disableDominatorTree() { AAQI.UseDominatorTree = false; }
};

/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
Expand Down
14 changes: 10 additions & 4 deletions llvm/include/llvm/Analysis/BasicAliasAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase {
const Function &F;
const TargetLibraryInfo &TLI;
AssumptionCache ∾
DominatorTree *DT;
/// Use getDT() instead of accessing this member directly, in order to
/// respect the AAQI.UseDominatorTree option.
DominatorTree *DT_;

DominatorTree *getDT(const AAQueryInfo &AAQI) const {
return AAQI.UseDominatorTree ? DT_ : nullptr;
}

public:
BasicAAResult(const DataLayout &DL, const Function &F,
const TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree *DT = nullptr)
: DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {}
: DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {}

BasicAAResult(const BasicAAResult &Arg)
: AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
DT(Arg.DT) {}
DT_(Arg.DT_) {}
BasicAAResult(BasicAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
AC(Arg.AC), DT(Arg.DT) {}
AC(Arg.AC), DT_(Arg.DT_) {}

/// Handle invalidation events in the new pass manager.
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
Expand Down
12 changes: 6 additions & 6 deletions llvm/include/llvm/Analysis/Loads.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

namespace llvm {

class AAResults;
class BatchAAResults;
class AssumptionCache;
class DataLayout;
class DominatorTree;
Expand Down Expand Up @@ -129,19 +129,19 @@ extern cl::opt<unsigned> DefMaxInstsToScan;
/// location in memory, as opposed to the value operand of a store.
///
/// \returns The found value, or nullptr if no value is found.
Value *FindAvailableLoadedValue(LoadInst *Load,
BasicBlock *ScanBB,
Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan = DefMaxInstsToScan,
AAResults *AA = nullptr,
BatchAAResults *AA = nullptr,
bool *IsLoadCSE = nullptr,
unsigned *NumScanedInst = nullptr);

/// This overload provides a more efficient implementation of
/// FindAvailableLoadedValue() for the case where we are not interested in
/// finding the closest clobbering instruction if no available load is found.
/// This overload cannot be used to scan across multiple blocks.
Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
bool *IsLoadCSE,
unsigned MaxInstsToScan = DefMaxInstsToScan);

/// Scan backwards to see if we have the value of the given pointer available
Expand Down Expand Up @@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy,
bool AtLeastAtomic, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan, AAResults *AA,
unsigned MaxInstsToScan, BatchAAResults *AA,
bool *IsLoadCSE, unsigned *NumScanedInst);

/// Returns true if a pointer value \p A can be replace with another pointer
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Analysis/BasicAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
// may be created without handles to some analyses and in that case don't
// depend on them.
if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) ||
(DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
(DT_ && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
return true;

// Otherwise this analysis result remains valid.
Expand Down Expand Up @@ -1063,6 +1063,7 @@ AliasResult BasicAAResult::aliasGEP(
: AliasResult::MayAlias;
}

DominatorTree *DT = getDT(AAQI);
DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);

Expand Down Expand Up @@ -1556,6 +1557,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
const Value *HintO1 = getUnderlyingObject(Hint1);
const Value *HintO2 = getUnderlyingObject(Hint2);

DominatorTree *DT = getDT(AAQI);
auto ValidAssumeForPtrContext = [&](const Value *Ptr) {
if (const Instruction *PtrI = dyn_cast<Instruction>(Ptr)) {
return isValidAssumeForContext(Assume, PtrI, DT,
Expand Down Expand Up @@ -1735,7 +1737,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
if (!Inst || Inst->getParent()->isEntryBlock())
return true;

return isNotInCycle(Inst, DT, /*LI*/ nullptr);
return isNotInCycle(Inst, getDT(AAQI), /*LI*/ nullptr);
}

/// Computes the symbolic difference between two de-composed GEPs.
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Analysis/Lint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,11 +657,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
BasicBlock::iterator BBI = L->getIterator();
BasicBlock *BB = L->getParent();
SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
BatchAAResults BatchAA(*AA);
for (;;) {
if (!VisitedBlocks.insert(BB).second)
break;
if (Value *U =
FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA))
FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA))
return findValueImpl(U, OffsetOk, Visited);
if (BBI != BB->begin())
break;
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,10 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
"to scan backward from a given instruction, when searching for "
"available loaded value"));

Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
BasicBlock *ScanBB,
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
AAResults *AA, bool *IsLoad,
BatchAAResults *AA, bool *IsLoad,
unsigned *NumScanedInst) {
// Don't CSE load that is volatile or anything stronger than unordered.
if (!Load->isUnordered())
Expand Down Expand Up @@ -583,7 +582,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
Value *llvm::findAvailablePtrLoadStore(
const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic,
BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan,
AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;

Expand Down Expand Up @@ -664,7 +663,7 @@ Value *llvm::findAvailablePtrLoadStore(
return nullptr;
}

Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
bool *IsLoadCSE,
unsigned MaxInstsToScan) {
const DataLayout &DL = Load->getModule()->getDataLayout();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
bool IsLoadCSE = false;
if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
BatchAAResults BatchAA(*AA);
if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
if (IsLoadCSE)
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);

Expand Down
13 changes: 8 additions & 5 deletions llvm/lib/Transforms/Scalar/JumpThreading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1260,8 +1260,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// the entry to its block.
BasicBlock::iterator BBIt(LoadI);
bool IsLoadCSE;
BatchAAResults BatchAA(*AA);
// The dominator tree is updated lazily and may not be valid at this point.
BatchAA.disableDominatorTree();
if (Value *AvailableVal = FindAvailableLoadedValue(
LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
// If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.

Expand Down Expand Up @@ -1322,9 +1325,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
AATags);
PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
PredBB, BBIt, DefMaxInstsToScan,
AA, &IsLoadCSE, &NumScanedInst);
PredAvailable = findAvailablePtrLoadStore(
Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
&BatchAA, &IsLoadCSE, &NumScanedInst);

// If PredBB has a single predecessor, continue scanning through the
// single predecessor.
Expand All @@ -1336,7 +1339,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
BBIt = SinglePredBB->end();
PredAvailable = findAvailablePtrLoadStore(
Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
(DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
(DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
&NumScanedInst);
}
}
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/Transforms/JumpThreading/pr79175.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes=jump-threading < %s | FileCheck %s

@f = external global i32

; Make sure the value of @f is reloaded prior to the final comparison.
define i32 @test(i64 %idx, i32 %val) {
; CHECK-LABEL: define i32 @test(
; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX]], 1
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[RETURN:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[F:%.*]] = load i32, ptr @f, align 4
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[F]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END_THREAD:%.*]], label [[COND_END:%.*]]
; CHECK: cond.end:
; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0
; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]]
; CHECK: cond.end.thread:
; CHECK-NEXT: br label [[TMP0]]
; CHECK: 0:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ]
; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4
; CHECK-NEXT: [[F_RELOAD:%.*]] = load i32, ptr @f, align 4
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: ret i32 0
; CHECK: return2:
; CHECK-NEXT: ret i32 1
;
entry:
%cmp = icmp slt i64 %idx, 1
br i1 %cmp, label %for.body, label %return

for.body:
%f = load i32, ptr @f, align 4
%cmp1 = icmp eq i32 %f, 0
br i1 %cmp1, label %cond.end, label %cond.false

cond.false:
br label %cond.end

cond.end:
%phi = phi i32 [ %val, %cond.false ], [ 1, %for.body ]
%cmp.i = icmp sgt i32 %phi, 0
%sel = select i1 %cmp.i, i32 0, i32 %phi
%f.idx = getelementptr inbounds i32, ptr @f, i64 %idx
store i32 %sel, ptr %f.idx, align 4
%f.reload = load i32, ptr @f, align 4
%cmp3 = icmp slt i32 %f.reload, 1
br i1 %cmp3, label %return2, label %return

return:
ret i32 0

return2:
ret i32 1
}