-
Notifications
You must be signed in to change notification settings - Fork 13.5k
Reland "[LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop #96752" #123616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: David Sherwood (david-arm) ChangesThe last attempt failed a sanitiser build because we were Patch is 39.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123616.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 31374a128856c7..6fc6ca14d08895 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -853,6 +853,25 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType = true);
+/// Calculate Start and End points of memory access.
+/// Let's assume A is the first access and B is a memory access on N-th loop
+/// iteration. Then B is calculated as:
+/// B = A + Step*N .
+/// Step value may be positive or negative.
+/// N is a calculated back-edge taken count:
+/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
+/// Start and End points are calculated in the following way:
+/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
+/// where SizeOfElt is the size of single memory access in bytes.
+///
+/// There is no conflict when the intervals are disjoint:
+/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
+std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
+ ScalarEvolution *SE,
+ DenseMap<std::pair<const SCEV *, Type *>,
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds);
+
class LoopAccessInfoManager {
/// The cache.
DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 11ccfa33821cad..7c704efd1011bf 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -13,6 +13,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -277,84 +278,89 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
bool llvm::isDereferenceableAndAlignedInLoop(
LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
+ const Align Alignment = LI->getAlign();
auto &DL = LI->getDataLayout();
Value *Ptr = LI->getPointerOperand();
-
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
DL.getTypeStoreSize(LI->getType()).getFixedValue());
- const Align Alignment = LI->getAlign();
-
- Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
// If given a uniform (i.e. non-varying) address, see if we can prove the
// access is safe within the loop w/o needing predication.
if (L->isLoopInvariant(Ptr))
- return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
- HeaderFirstNonPHI, AC, &DT);
+ return isDereferenceableAndAlignedPointer(
+ Ptr, Alignment, EltSize, DL, L->getHeader()->getFirstNonPHI(), AC, &DT);
+
+ const SCEV *PtrScev = SE.getSCEV(Ptr);
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(PtrScev);
- // Otherwise, check to see if we have a repeating access pattern where we can
- // prove that all accesses are well aligned and dereferenceable.
- auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
+ // Check to see if we have a repeating access pattern and it's possible
+ // to prove all accesses are well aligned.
if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
return false;
- auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
+
+ auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
if (!Step)
return false;
- auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
- if (!TC)
+ // For the moment, restrict ourselves to the case where the access size is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ if (EltSize.urem(Alignment.value()) != 0)
return false;
// TODO: Handle overlapping accesses.
- // We should be computing AccessSize as (TC - 1) * Step + EltSize.
- if (EltSize.sgt(Step->getAPInt()))
+ if (EltSize.ugt(Step->getAPInt().abs()))
+ return false;
+
+ const SCEV *MaxBECount =
+ Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
+ : SE.getConstantMaxBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ return false;
+
+ const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
+ L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr);
+ if (isa<SCEVCouldNotCompute>(AccessStart) ||
+ isa<SCEVCouldNotCompute>(AccessEnd))
return false;
- // Compute the total access size for access patterns with unit stride and
- // patterns with gaps. For patterns with unit stride, Step and EltSize are the
- // same.
- // For patterns with gaps (i.e. non unit stride), we are
- // accessing EltSize bytes at every Step.
- APInt AccessSize = TC * Step->getAPInt();
+ // Try to get the access size.
+ const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
+ APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff);
- assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
- "implied by addrec definition");
Value *Base = nullptr;
- if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
- Base = StartS->getValue();
- } else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
- // Handle (NewBase + offset) as start value.
- const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
- const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
- if (StartS->getNumOperands() == 2 && Offset && NewBase) {
- // The following code below assumes the offset is unsigned, but GEP
- // offsets are treated as signed so we can end up with a signed value
- // here too. For example, suppose the initial PHI value is (i8 255),
- // the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
- if (Offset->getAPInt().isNegative())
- return false;
+ APInt AccessSize;
+ if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) {
+ Base = NewBase->getValue();
+ AccessSize = MaxPtrDiff;
+ } else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
+ if (MinAdd->getNumOperands() != 2)
+ return false;
- // For the moment, restrict ourselves to the case where the offset is a
- // multiple of the requested alignment and the base is aligned.
- // TODO: generalize if a case found which warrants
- if (Offset->getAPInt().urem(Alignment.value()) != 0)
- return false;
- Base = NewBase->getValue();
- bool Overflow = false;
- AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow);
- if (Overflow)
- return false;
- }
- }
+ const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
+ const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
+ if (!Offset || !NewBase)
+ return false;
- if (!Base)
- return false;
+ // The following code below assumes the offset is unsigned, but GEP
+ // offsets are treated as signed so we can end up with a signed value
+ // here too. For example, suppose the initial PHI value is (i8 255),
+ // the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
+ if (Offset->getAPInt().isNegative())
+ return false;
- // For the moment, restrict ourselves to the case where the access size is a
- // multiple of the requested alignment and the base is aligned.
- // TODO: generalize if a case found which warrants
- if (EltSize.urem(Alignment.value()) != 0)
+ // For the moment, restrict ourselves to the case where the offset is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ if (Offset->getAPInt().urem(Alignment.value()) != 0)
+ return false;
+
+ AccessSize = MaxPtrDiff + Offset->getAPInt();
+ Base = NewBase->getValue();
+ } else
return false;
+
+ Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
HeaderFirstNonPHI, AC, &DT);
}
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2a68979add666d..11e0a221fc8878 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -190,31 +190,20 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
Members.push_back(Index);
}
-/// Calculate Start and End points of memory access.
-/// Let's assume A is the first access and B is a memory access on N-th loop
-/// iteration. Then B is calculated as:
-/// B = A + Step*N .
-/// Step value may be positive or negative.
-/// N is a calculated back-edge taken count:
-/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
-/// Start and End points are calculated in the following way:
-/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
-/// where SizeOfElt is the size of single memory access in bytes.
-///
-/// There is no conflict when the intervals are disjoint:
-/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
-static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
- const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
- PredicatedScalarEvolution &PSE,
+std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
+ ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
- std::pair<const SCEV *, const SCEV *>> &PointerBounds) {
- ScalarEvolution *SE = PSE.getSE();
-
- auto [Iter, Ins] = PointerBounds.insert(
- {{PtrExpr, AccessTy},
- {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
- if (!Ins)
- return Iter->second;
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
+ std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
+ if (PointerBounds) {
+ auto [Iter, Ins] = PointerBounds->insert(
+ {{PtrExpr, AccessTy},
+ {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
+ if (!Ins)
+ return Iter->second;
+ PtrBoundsPair = &Iter->second;
+ }
const SCEV *ScStart;
const SCEV *ScEnd;
@@ -222,10 +211,8 @@ static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
if (SE->isLoopInvariant(PtrExpr, Lp)) {
ScStart = ScEnd = PtrExpr;
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
- const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount();
-
ScStart = AR->getStart();
- ScEnd = AR->evaluateAtIteration(Ex, *SE);
+ ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
const SCEV *Step = AR->getStepRecurrence(*SE);
// For expressions with negative step, the upper bound is ScStart and the
@@ -244,7 +231,7 @@ static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
- assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant");
+ assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
// Add the size of the pointed element to ScEnd.
auto &DL = Lp->getHeader()->getDataLayout();
@@ -252,8 +239,10 @@ static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
- Iter->second = {ScStart, ScEnd};
- return Iter->second;
+ std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
+ if (PointerBounds)
+ *PtrBoundsPair = Res;
+ return Res;
}
/// Calculate Start and End points of memory access using
@@ -263,8 +252,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
unsigned DepSetId, unsigned ASId,
PredicatedScalarEvolution &PSE,
bool NeedsFreeze) {
+ const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
- Lp, PtrExpr, AccessTy, PSE, DC.getPointerBounds());
+ Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
@@ -1938,10 +1928,11 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// required for correctness.
if (SE.isLoopInvariant(Src, InnermostLoop) ||
SE.isLoopInvariant(Sink, InnermostLoop)) {
- const auto &[SrcStart_, SrcEnd_] =
- getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
- const auto &[SinkStart_, SinkEnd_] =
- getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
+ const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
+ const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
+ InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
+ const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
+ InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
!isa<SCEVCouldNotCompute>(SinkStart_) &&
diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
index 1433e48690bc60..3e50ee42866b9b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
@@ -2920,8 +2920,8 @@ loop_exit:
ret i32 %accum.next
}
-define i32 @neg_test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
-; CHECK-LABEL: @neg_test_non_unit_stride_off_by_four_bytes(
+define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
+; CHECK-LABEL: @test_non_unit_stride_off_by_four_bytes(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [103 x i32], align 4
; CHECK-NEXT: call void @init(ptr [[ALLOCA]])
@@ -2929,11 +2929,11 @@ define i32 @neg_test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base)
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE33:%.*]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP144:%.*]], [[PRED_LOAD_CONTINUE33]] ]
-; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP145:%.*]], [[PRED_LOAD_CONTINUE33]] ]
-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP146:%.*]], [[PRED_LOAD_CONTINUE33]] ]
-; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP147:%.*]], [[PRED_LOAD_CONTINUE33]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP113:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
@@ -2999,170 +2999,74 @@ define i32 @neg_test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base)
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0
-; CHECK-NEXT: br i1 [[TMP64]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
-; CHECK: pred.load.if:
-; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
-; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
-; CHECK: pred.load.continue:
-; CHECK-NEXT: [[TMP68:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP67]], [[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP69:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1
-; CHECK-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
-; CHECK: pred.load.if4:
-; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4
-; CHECK-NEXT: [[TMP72:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP71]], i32 1
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE5]]
-; CHECK: pred.load.continue5:
-; CHECK-NEXT: [[TMP73:%.*]] = phi <4 x i32> [ [[TMP68]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP72]], [[PRED_LOAD_IF4]] ]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2
-; CHECK-NEXT: br i1 [[TMP74]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
-; CHECK: pred.load.if6:
-; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4
-; CHECK-NEXT: [[TMP77:%.*]] = insertelement <4 x i32> [[TMP73]], i32 [[TMP76]], i32 2
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE7]]
-; CHECK: pred.load.continue7:
-; CHECK-NEXT: [[TMP78:%.*]] = phi <4 x i32> [ [[TMP73]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP77]], [[PRED_LOAD_IF6]] ]
-; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3
-; CHECK-NEXT: br i1 [[TMP79]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
-; CHECK: pred.load.if8:
-; CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4
-; CHECK-NEXT: [[TMP82:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP81]], i32 3
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE9]]
-; CHECK: pred.load.continue9:
-; CHECK-NEXT: [[TMP83:%.*]] = phi <4 x i32> [ [[TMP78]], [[PRED_LOAD_CONTINUE7]] ], [ [[TMP82]], [[PRED_LOAD_IF8]] ]
-; CHECK-NEXT: [[TMP84:%.*]] = extractelement <4 x i1> [[TMP47]], i32 0
-; CHECK-NEXT: br i1 [[TMP84]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
-; CHECK: pred.load.if10:
-; CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4
-; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> poison, i32 [[TMP86]], i32 0
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE11]]
-; CHECK: pred.load.continue11:
-; CHECK-NEXT: [[TMP88:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP87]], [[PRED_LOAD_IF10]] ]
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1
-; CHECK-NEXT: br i1 [[TMP89]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
-; CHECK: pred.load.if12:
-; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP90]], align 4
-; CHECK-NEXT: [[TMP92:%.*]] = insertelement <4 x i32> [[TMP88]], i32 [[TMP91]], i32 1
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE13]]
-; CHECK: pred.load.continue13:
-; CHECK-NEXT: [[...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
…eableAndAlignedInLoop llvm#96752" The last attempt failed a sanitiser build because we were creating a reference to a null Predicates pointer in isDereferenceableAndAlignedInLoop. This was exposed by the unit test IsDerefReadOnlyLoop in unittests/Analysis/LoadsTest.cpp. I fixed this by falling back on getConstantMaxBackedgeTakenCount if Predicates is null.
Rebase + deal with conflict |
✅ With the latest revision this PR passed the C/C++ code formatter. |
The last attempt failed a sanitiser build because we were
creating a reference to a null Predicates pointer in
isDereferenceableAndAlignedInLoop. This was exposed by
the unit test IsDerefReadOnlyLoop in
unittests/Analysis/LoadsTest.cpp. I fixed this by falling
back on getConstantMaxBackedgeTakenCount if Predicates is
null - see line 316 in llvm/lib/Analysis/Loads.cpp. There
are no other changes.