diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 639070c07897b..224c936bf161e 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -88,12 +88,6 @@ bool isDereferenceableAndAlignedInLoop( AssumptionCache *AC = nullptr, SmallVectorImpl *Predicates = nullptr); -/// Return true if the loop \p L cannot fault on any iteration and only -/// contains read-only memory accesses. -bool isDereferenceableReadOnlyLoop( - Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - SmallVectorImpl *Predicates = nullptr); - /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index e959d93b57275..6c0a3846cd946 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -382,11 +382,18 @@ class LoopVectorizationLegality { const LoopAccessInfo *getLAI() const { return LAI; } bool isSafeForAnyVectorWidth() const { - return LAI->getDepChecker().isSafeForAnyVectorWidth(); + return LAI->getDepChecker().isSafeForAnyVectorWidth() && + (!hasUncountableEarlyExit() || !getNumPotentiallyFaultingPointers()); } uint64_t getMaxSafeVectorWidthInBits() const { - return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); + uint64_t MaxSafeVectorWidth = + LAI->getDepChecker().getMaxSafeVectorWidthInBits(); + // The legalizer bails out if getMinPageSize does not return a value. + if (hasUncountableEarlyExit() && getNumPotentiallyFaultingPointers()) + MaxSafeVectorWidth = + std::min(MaxSafeVectorWidth, uint64_t(*TTI->getMinPageSize()) * 8); + return MaxSafeVectorWidth; } /// Returns true if the loop has exactly one uncountable early exit, i.e. an @@ -419,6 +426,19 @@ class LoopVectorizationLegality { unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } + /// Return the number of pointers in the loop that could potentially fault in + /// a loop with uncountable early exits. + unsigned getNumPotentiallyFaultingPointers() const { + return PotentiallyFaultingPtrs.size(); + } + + /// Return a vector of all potentially faulting pointers in a loop with + /// uncountable early exits. + ArrayRef> + getPotentiallyFaultingPointers() const { + return PotentiallyFaultingPtrs; + } + /// Returns a HistogramInfo* for the given instruction if it was determined /// to be part of a load -> update -> store sequence where multiple lanes /// may be working on the same memory address. @@ -524,6 +544,11 @@ class LoopVectorizationLegality { /// additional cases safely. bool isVectorizableEarlyExitLoop(); + /// Returns true if all loads in the loop contained in \p Loads can be + /// analyzed as potentially faulting. Any loads that may fault are added to + /// the member variable PotentiallyFaultingPtrs. + bool analyzePotentiallyFaultingLoads(SmallVectorImpl &Loads); + /// Return true if all of the instructions in the block can be speculatively /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know @@ -642,6 +667,10 @@ class LoopVectorizationLegality { /// Keep track of the loop edge to an uncountable exit, comprising a pair /// of (Exiting, Exit) blocks, if there is exactly one early exit. std::optional> UncountableEdge; + + /// Keep a record of all potentially faulting pointers in loops with + /// uncountable early exits. + SmallVector, 4> PotentiallyFaultingPtrs; }; } // namespace llvm diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index b461c41d29e84..304bdcd1fba25 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -816,18 +816,3 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To, return isPointerAlwaysReplaceable(From, To, DL); } - -bool llvm::isDereferenceableReadOnlyLoop( - Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - SmallVectorImpl *Predicates) { - for (BasicBlock *BB : L->blocks()) { - for (Instruction &I : *BB) { - if (auto *LI = dyn_cast(&I)) { - if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) - return false; - } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) - return false; - } - } - return true; -} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 420cbc5384ce4..daf15646ee45a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1602,6 +1602,46 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG( return Result; } +bool LoopVectorizationLegality::analyzePotentiallyFaultingLoads( + SmallVectorImpl &Loads) { + LLVM_DEBUG(dbgs() << "LV: Looking for potentially faulting loads in loop " + "with uncountable early exit:\n"); + for (LoadInst *LI : Loads) { + LLVM_DEBUG(dbgs() << "LV: Load: " << *LI << '\n'); + if (LI->getPointerAddressSpace()) + return false; + + Value *Ptr = LI->getPointerOperand(); + const SCEV *PtrExpr = PSE.getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast(PtrExpr); + // TODO: Deal with loop invariant pointers. + // NOTE: The reasoning below is only safe if the load executes at least + // once. + if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) + return false; + auto Step = dyn_cast(AR->getStepRecurrence(*PSE.getSE())); + if (!Step) + return false; + const SCEV *Start = AR->getStart(); + + // Make sure the step is positive and matches the object size in memory. + // TODO: Extend this to cover more cases. + auto &DL = LI->getDataLayout(); + APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), + DL.getTypeStoreSize(LI->getType()).getFixedValue()); + + // Also discard element sizes that are not a power of 2, since the loop + // vectorizer can only perform loop versioning with pointer alignment + // checks for vector loads that are power-of-2 in size. + if (EltSize != Step->getAPInt() || !EltSize.isPowerOf2()) + return false; + + LLVM_DEBUG(dbgs() << "LV: SCEV for Load Ptr: " << *Start << '\n'); + PotentiallyFaultingPtrs.push_back({Start, LI->getType()}); + } + return true; +} + bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { BasicBlock *LatchBB = TheLoop->getLoopLatch(); if (!LatchBB) { @@ -1706,6 +1746,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { } }; + Predicates.clear(); + SmallVector NonDerefLoads; for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { if (I.mayWriteToMemory()) { @@ -1715,30 +1757,52 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { "Cannot vectorize early exit loop with writes to memory", "WritesInEarlyExitLoop", ORE, TheLoop); return false; - } else if (!IsSafeOperation(&I)) { + } else if (I.mayThrow() || !IsSafeOperation(&I)) { reportVectorizationFailure("Early exit loop contains operations that " "cannot be speculatively executed", "UnsafeOperationsEarlyExitLoop", ORE, TheLoop); return false; + } else if (I.mayReadFromMemory()) { + auto *LI = dyn_cast(&I); + bool UnsafeRead = false; + if (!LI) + UnsafeRead = true; + else if (!isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(), + *DT, AC, &Predicates)) { + if (LI->getParent() != TheLoop->getHeader()) + UnsafeRead = true; + else + NonDerefLoads.push_back(LI); + } + + if (UnsafeRead) { + reportVectorizationFailure( + "Loop may fault", + "Cannot vectorize potentially faulting early exit loop", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } } } + if (!NonDerefLoads.empty()) { + if (!TTI->getMinPageSize() || + !analyzePotentiallyFaultingLoads(NonDerefLoads)) { + PotentiallyFaultingPtrs.clear(); + reportVectorizationFailure( + "Loop may fault", + "Cannot vectorize potentially faulting early exit loop", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } + LLVM_DEBUG(dbgs() << "We can vectorize the loop with runtime checks.\n"); + } + // The vectoriser cannot handle loads that occur after the early exit block. assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first && "Expected latch predecessor to be the early exiting block"); - // TODO: Handle loops that may fault. - Predicates.clear(); - if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, - &Predicates)) { - reportVectorizationFailure( - "Loop may fault", - "Cannot vectorize potentially faulting early exit loop", - "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); - return false; - } - [[maybe_unused]] const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); // Since we have an exact exit count for the latch and the early exit diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e8a5db28ea0a4..7f9ffffc16efe 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -401,6 +401,12 @@ static cl::opt EnableEarlyExitVectorization( cl::desc( "Enable vectorization of early exit loops with uncountable exits.")); +static cl::opt MaxNumPotentiallyFaultingPointers( + "max-num-faulting-pointers", cl::init(0), cl::Hidden, + cl::desc( + "The maximum number of potentially faulting pointers we permit when " + "vectorizing loops with uncountable exits.")); + // Likelyhood of bypassing the vectorized loop because assumptions about SCEV // variables not overflowing do not hold. See `emitSCEVChecks`. static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127}; @@ -2163,6 +2169,28 @@ class GeneratedRTChecks { }; } // namespace +static void +addPointerAlignmentChecks(ArrayRef> Ptrs, + Function *F, PredicatedScalarEvolution &PSE, + TargetTransformInfo *TTI, ElementCount VF, + unsigned IC) { + ScalarEvolution *SE = PSE.getSE(); + const DataLayout &DL = SE->getDataLayout(); + + for (auto Ptr : Ptrs) { + Type *PtrIntType = DL.getIntPtrType(Ptr.first->getType()); + APInt EltSize(PtrIntType->getScalarSizeInBits(), + DL.getTypeStoreSize(Ptr.second).getFixedValue()); + const SCEV *Start = SE->getPtrToIntExpr(Ptr.first, PtrIntType); + const SCEV *ScevEC = SE->getElementCount(PtrIntType, VF * IC); + const SCEV *Align = + SE->getMulExpr(ScevEC, SE->getConstant(EltSize), + (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW)); + const SCEV *Rem = SE->getURemExpr(Start, Align); + PSE.addPredicate(*SE->getEqualPredicate(Rem, SE->getZero(PtrIntType))); + } +} + static bool useActiveLaneMask(TailFoldingStyle Style) { return Style == TailFoldingStyle::Data || Style == TailFoldingStyle::DataAndControlFlow || @@ -3842,6 +3870,15 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() { return false; } + if (Legal->hasUncountableEarlyExit() && + Legal->getNumPotentiallyFaultingPointers() && + !TTI.isVScaleKnownToBeAPowerOfTwo()) { + reportVectorizationInfo("Cannot vectorize potentially faulting early exit " + "loop with scalable vectors.", + "ScalableVFUnfeasible", ORE, TheLoop); + return false; + } + IsScalableVectorizationAllowed = true; return true; } @@ -10508,11 +10545,25 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } - if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) { - reportVectorizationFailure("Auto-vectorization of loops with uncountable " - "early exit is not enabled", - "UncountableEarlyExitLoopsDisabled", ORE, L); - return false; + if (LVL.hasUncountableEarlyExit()) { + if (!EnableEarlyExitVectorization) { + reportVectorizationFailure("Auto-vectorization of loops with uncountable " + "early exit is not enabled", + "UncountableEarlyExitLoopsDisabled", ORE, L); + return false; + } + + unsigned NumPotentiallyFaultingPointers = + LVL.getNumPotentiallyFaultingPointers(); + if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) { + reportVectorizationFailure("Not worth vectorizing loop with uncountable " + "early exit, due to number of potentially " + "faulting loads", + "UncountableEarlyExitMayFault", ORE, L); + return false; + } else if (NumPotentiallyFaultingPointers) + LLVM_DEBUG(dbgs() << "LV: Need to version early-exit vector loop with " + << "pointer alignment checks.\n"); } // Entrance to the VPlan-native vectorization path. Outer loops are processed @@ -10663,8 +10714,16 @@ bool LoopVectorizePass::processLoop(Loop *L) { unsigned SelectedIC = std::max(IC, UserIC); // Optimistically generate runtime checks if they are needed. Drop them if // they turn out to not be profitable. - if (VF.Width.isVector() || SelectedIC > 1) + if (VF.Width.isVector() || SelectedIC > 1) { + if (LVL.getNumPotentiallyFaultingPointers()) { + assert(!CM.foldTailWithEVL() && + "Explicit vector length unsupported for early exit loops and " + "potentially faulting loads"); + addPointerAlignmentChecks(LVL.getPotentiallyFaultingPointers(), F, PSE, + TTI, VF.Width, SelectedIC); + } Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); + } // Check if it is profitable to vectorize with runtime checks. bool ForceVectorization = diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll new file mode 100644 index 0000000000000..2163df92c83ad --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll @@ -0,0 +1,954 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5 +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,MAX1 +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=2 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,MAX2 + +target triple = "aarch64-unknown-linux-gnu" + +declare void @init_mem(ptr, i64); + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP12]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 3, [[N_VEC]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq [[WIDE_LOAD]], splat (i8 3) +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP19]], splat (i1 true) +; CHECK-NEXT: [[TMP21:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] +; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]] +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: br i1 [[TMP21]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[VECTOR_EARLY_EXIT]]: +; CHECK-NEXT: br label %[[LOOP_END]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP15]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 { +; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( +; MAX1-SAME: ) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4 +; MAX1-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4 +; MAX1-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; MAX1-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( +; MAX2-SAME: ) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4 +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; MAX2-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: br i1 true, label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP18]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 16 +; MAX2-NEXT: [[TMP21:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: [[TMP22:%.*]] = call @llvm.stepvector.nxv16i64() +; MAX2-NEXT: [[TMP23:%.*]] = mul [[TMP22]], splat (i64 1) +; MAX2-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP23]] +; MAX2-NEXT: [[TMP24:%.*]] = mul i64 1, [[TMP20]] +; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP24]], i64 0 +; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX3:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT5:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX3]] +; MAX2-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP25]] +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP27]], align 1 +; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP25]] +; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP29]], align 1 +; MAX2-NEXT: [[TMP30:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD4]] +; MAX2-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], [[TMP20]] +; MAX2-NEXT: [[TMP31:%.*]] = xor [[TMP30]], splat (i1 true) +; MAX2-NEXT: [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP31]]) +; MAX2-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC]] +; MAX2-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MAX2-NEXT: [[TMP34:%.*]] = or i1 [[TMP32]], [[TMP33]] +; MAX2-NEXT: br i1 [[TMP34]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP32]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 true) +; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]] +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [42 x i8] + %p2 = alloca [42 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 { +; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; MAX1-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; MAX2-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0 +; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]] +; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16 +; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: [[TMP23:%.*]] = call @llvm.stepvector.nxv16i64() +; MAX2-NEXT: [[TMP24:%.*]] = mul [[TMP23]], splat (i64 1) +; MAX2-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP24]] +; MAX2-NEXT: [[TMP25:%.*]] = mul i64 1, [[TMP21]] +; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 +; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]] +; MAX2-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP28]], align 1 +; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP30]], align 1 +; MAX2-NEXT: [[TMP31:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD5]] +; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]] +; MAX2-NEXT: [[TMP32:%.*]] = xor [[TMP31]], splat (i1 true) +; MAX2-NEXT: [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP32]]) +; MAX2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] +; MAX2-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MAX2-NEXT: [[TMP35:%.*]] = or i1 [[TMP33]], [[TMP34]] +; MAX2-NEXT: br i1 [[TMP35]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP33]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]] +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP7:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 { +; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; MAX1-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; MAX2-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0 +; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]] +; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16 +; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: [[TMP23:%.*]] = call @llvm.stepvector.nxv16i64() +; MAX2-NEXT: [[TMP24:%.*]] = mul [[TMP23]], splat (i64 1) +; MAX2-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP24]] +; MAX2-NEXT: [[TMP25:%.*]] = mul i64 1, [[TMP21]] +; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 +; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]] +; MAX2-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP28]], align 1 +; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP30]], align 1 +; MAX2-NEXT: [[TMP31:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD5]] +; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]] +; MAX2-NEXT: [[TMP32:%.*]] = xor [[TMP31]], splat (i1 true) +; MAX2-NEXT: [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP32]]) +; MAX2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] +; MAX2-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MAX2-NEXT: [[TMP35:%.*]] = or i1 [[TMP33]], [[TMP34]] +; MAX2-NEXT: br i1 [[TMP35]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP33]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]] +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP9:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %stride2 = mul i64 %index, 2 + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2 + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown( +; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %mul = mul i64 %index, %stride + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) #1 { +; MAX1-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( +; MAX1-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX1: [[VECTOR_SCEVCHECK]]: +; MAX1-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i4 +; MAX1-NEXT: [[TMP1:%.*]] = add i4 [[TMP0]], 3 +; MAX1-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i64 +; MAX1-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; MAX1-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX1: [[VECTOR_PH]]: +; MAX1-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX1: [[VECTOR_BODY]]: +; MAX1-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] +; MAX1-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; MAX1-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; MAX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; MAX1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 +; MAX1-NEXT: [[TMP6:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3) +; MAX1-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 16 +; MAX1-NEXT: [[TMP7:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true) +; MAX1-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP7]]) +; MAX1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; MAX1-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; MAX1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; MAX1: [[MIDDLE_SPLIT]]: +; MAX1-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX1: [[MIDDLE_BLOCK]]: +; MAX1-NEXT: br i1 true, label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX1: [[VECTOR_EARLY_EXIT]]: +; MAX1-NEXT: br label %[[LOOP_END]] +; MAX1: [[SCALAR_PH]]: +; MAX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( +; MAX2-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i4 +; MAX2-NEXT: [[TMP1:%.*]] = add i4 [[TMP0]], 3 +; MAX2-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i64 +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; MAX2-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; MAX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 +; MAX2-NEXT: [[TMP6:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3) +; MAX2-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 16 +; MAX2-NEXT: [[TMP7:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true) +; MAX2-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP7]]) +; MAX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; MAX2-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; MAX2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: br i1 true, label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 4 + %cmp3 = icmp eq i32 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 { +; MAX1-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( +; MAX1-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( +; MAX2-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0 +; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]] +; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16 +; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]] +; MAX2-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP23]] +; MAX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP24]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP25]], align 1 +; MAX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP23]] +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP27]], align 1 +; MAX2-NEXT: [[TMP28:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD5]] +; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]] +; MAX2-NEXT: [[TMP29:%.*]] = xor [[TMP28]], splat (i1 true) +; MAX2-NEXT: [[TMP30:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP29]]) +; MAX2-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] +; MAX2-NEXT: [[TMP32:%.*]] = or i1 [[TMP30]], [[TMP31]] +; MAX2-NEXT: br i1 [[TMP32]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP30]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i8 @same_exit_block_no_live_outs_faulting_load_after_early_exit(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i8 @same_exit_block_no_live_outs_faulting_load_after_early_exit( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ 1, %[[LOOP]] ], [ [[LD2]], %[[LOOP_INC]] ] +; CHECK-NEXT: ret i8 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i8 [ 1, %loop ], [ %ld2, %loop.inc ] + ret i8 %retval +} + + +attributes #0 = { "target-features"="+sve" vscale_range(1,16) } +attributes #1 = { "target-features"="+sve" } +;. +; MAX1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; MAX1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; MAX1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; MAX1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; MAX1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; MAX1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +;. +; MAX2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; MAX2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; MAX2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; MAX2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; MAX2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; MAX2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; MAX2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; MAX2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; MAX2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; MAX2: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} +; MAX2: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; MAX2: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} +; MAX2: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; MAX2: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll new file mode 100644 index 0000000000000..d2e3cc9cdb018 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll @@ -0,0 +1,471 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -passes=loop-vectorize \ +; RUN: -scalable-vectorization=on -max-num-faulting-pointers=1 -mtriple riscv64-linux-gnu \ +; RUN: -mattr=+v,+f | FileCheck %s + +target triple = "riscv64" + +declare void @init_mem(ptr, i64); + + +define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [42 x i8] + %p2 = alloca [42 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i5 +; CHECK-NEXT: [[TMP1:%.*]] = add i5 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = zext i5 [[TMP1]] to i64 +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP1]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3) +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32 +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP1]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END1:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: br label [[LOOP_END1]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ], [ 3, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END1]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END1]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[LOOP_END]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %stride2 = mul i64 %index, 2 + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2 + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown( +; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %mul = mul i64 %index, %stride + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( +; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i5 +; CHECK-NEXT: [[TMP1:%.*]] = add i5 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = zext i5 [[TMP1]] to i64 +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3) +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32 +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END1:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: br label [[LOOP_END1]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ], [ 3, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END1]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END1]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[LOOP_END]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 4 + %cmp3 = icmp eq i32 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +attributes #0 = { vscale_range(2,1024) } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll index c68eeac19c9ec..69477d6256491 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=1 | FileCheck %s declare void @init_mem(ptr, i64); @@ -141,3 +141,43 @@ loop.end: %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] ret i64 %retval } + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( +; CHECK-SAME: ptr [[P1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +}