@@ -400,6 +400,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
400400 cl::desc(
401401 " Enable vectorization of early exit loops with uncountable exits." ));
402402
403+ static cl::opt<unsigned > MaxNumPotentiallyFaultingPointers (
404+ " max-num-faulting-pointers" , cl::init(1 ), cl::Hidden,
405+ cl::desc(
406+ " The maximum number of potentially faulting pointers we permit when "
407+ " vectorizing loops with uncountable exits." ));
408+
403409// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
404410// variables not overflowing do not hold. See `emitSCEVChecks`.
405411static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1585,6 +1591,22 @@ class LoopVectorizationCostModel {
15851591 ElementCount MaxSafeVF,
15861592 bool FoldTailByMasking);
15871593
1594+ bool isSafeForAnyVectorWidth () const {
1595+ return Legal->isSafeForAnyVectorWidth () &&
1596+ (!Legal->hasUncountableEarlyExit () ||
1597+ !Legal->getNumPotentiallyFaultingLoads ());
1598+ }
1599+
1600+ uint64_t getMaxSafeVectorWidthInBits () const {
1601+ uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits ();
1602+ // The legalizer bails out if getMinPageSize does not return a value.
1603+ if (Legal->hasUncountableEarlyExit () &&
1604+ Legal->getNumPotentiallyFaultingLoads ())
1605+ MaxSafeVectorWidth =
1606+ std::min (MaxSafeVectorWidth, uint64_t (*TTI.getMinPageSize ()) * 8 );
1607+ return MaxSafeVectorWidth;
1608+ }
1609+
15881610 // / Checks if scalable vectorization is supported and enabled. Caches the
15891611 // / result to avoid repeated debug dumps for repeated queries.
15901612 bool isScalableVectorizationAllowed ();
@@ -2133,6 +2155,41 @@ class GeneratedRTChecks {
21332155};
21342156} // namespace
21352157
2158+ std::optional<unsigned > getMaxVScale (const Function &F,
2159+ const TargetTransformInfo &TTI) {
2160+ if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2161+ return MaxVScale;
2162+
2163+ if (F.hasFnAttribute (Attribute::VScaleRange))
2164+ return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2165+
2166+ return std::nullopt ;
2167+ }
2168+
2169+ static void addPointerAlignmentChecks (
2170+ const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads,
2171+ Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
2172+ ElementCount VF) {
2173+ ScalarEvolution *SE = PSE.getSE ();
2174+ const DataLayout &DL = SE->getDataLayout ();
2175+ Type *PtrIntType = DL.getIntPtrType (SE->getContext ());
2176+
2177+ const SCEV *Zero = SE->getZero (PtrIntType);
2178+ const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF);
2179+
2180+ for (auto Load : *Loads) {
2181+ APInt EltSize (
2182+ DL.getIndexTypeSizeInBits (Load.first ->getPointerOperandType ()),
2183+ DL.getTypeStoreSize (Load.first ->getType ()).getFixedValue ());
2184+ const SCEV *Start = SE->getPtrToIntExpr (Load.second , PtrIntType);
2185+ const SCEV *Align =
2186+ SE->getMulExpr (ScevEC, SE->getConstant (EltSize),
2187+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
2188+ const SCEV *Rem = SE->getURemExpr (Start, Align);
2189+ PSE.addPredicate (*(SE->getEqualPredicate (Rem, Zero)));
2190+ }
2191+ }
2192+
21362193static bool useActiveLaneMask (TailFoldingStyle Style) {
21372194 return Style == TailFoldingStyle::Data ||
21382195 Style == TailFoldingStyle::DataAndControlFlow ||
@@ -2302,17 +2359,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
23022359 llvm_unreachable (" invalid enum" );
23032360}
23042361
2305- std::optional<unsigned > getMaxVScale (const Function &F,
2306- const TargetTransformInfo &TTI) {
2307- if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2308- return MaxVScale;
2309-
2310- if (F.hasFnAttribute (Attribute::VScaleRange))
2311- return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2312-
2313- return std::nullopt ;
2314- }
2315-
23162362// / For the given VF and UF and maximum trip count computed for the loop, return
23172363// / whether the induction variable might overflow in the vectorized loop. If not,
23182364// / then we know a runtime overflow check always evaluates to false and can be
@@ -3796,13 +3842,22 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
37963842 return false ;
37973843 }
37983844
3799- if (!Legal-> isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3845+ if (!isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
38003846 reportVectorizationInfo (" The target does not provide maximum vscale value "
38013847 " for safe distance analysis." ,
38023848 " ScalableVFUnfeasible" , ORE, TheLoop);
38033849 return false ;
38043850 }
38053851
3852+ if (Legal->hasUncountableEarlyExit () &&
3853+ Legal->getNumPotentiallyFaultingLoads () &&
3854+ !TTI.isVScaleKnownToBeAPowerOfTwo ()) {
3855+ reportVectorizationInfo (" Cannot vectorize potentially faulting early exit "
3856+ " loop with scalable vectors." ,
3857+ " ScalableVFUnfeasible" , ORE, TheLoop);
3858+ return false ;
3859+ }
3860+
38063861 IsScalableVectorizationAllowed = true ;
38073862 return true ;
38083863}
@@ -3814,7 +3869,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
38143869
38153870 auto MaxScalableVF = ElementCount::getScalable (
38163871 std::numeric_limits<ElementCount::ScalarTy>::max ());
3817- if (Legal-> isSafeForAnyVectorWidth ())
3872+ if (isSafeForAnyVectorWidth ())
38183873 return MaxScalableVF;
38193874
38203875 std::optional<unsigned > MaxVScale = getMaxVScale (*TheFunction, TTI);
@@ -3841,11 +3896,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
38413896 // the memory accesses that is most restrictive (involved in the smallest
38423897 // dependence distance).
38433898 unsigned MaxSafeElements =
3844- llvm::bit_floor (Legal-> getMaxSafeVectorWidthInBits () / WidestType);
3899+ llvm::bit_floor (getMaxSafeVectorWidthInBits () / WidestType);
38453900
38463901 auto MaxSafeFixedVF = ElementCount::getFixed (MaxSafeElements);
38473902 auto MaxSafeScalableVF = getMaxLegalScalableVF (MaxSafeElements);
3848- if (!Legal-> isSafeForAnyVectorWidth ())
3903+ if (!isSafeForAnyVectorWidth ())
38493904 this ->MaxSafeElements = MaxSafeElements;
38503905
38513906 LLVM_DEBUG (dbgs () << " LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10380,11 +10435,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1038010435 return false ;
1038110436 }
1038210437
10383- if (LVL.hasUncountableEarlyExit () && !EnableEarlyExitVectorization) {
10384- reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10385- " early exit is not enabled" ,
10386- " UncountableEarlyExitLoopsDisabled" , ORE, L);
10387- return false ;
10438+ if (LVL.hasUncountableEarlyExit ()) {
10439+ if (!EnableEarlyExitVectorization) {
10440+ reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10441+ " early exit is not enabled" ,
10442+ " UncountableEarlyExitLoopsDisabled" , ORE, L);
10443+ return false ;
10444+ }
10445+
10446+ unsigned NumPotentiallyFaultingPointers =
10447+ LVL.getNumPotentiallyFaultingLoads ();
10448+ if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
10449+ reportVectorizationFailure (" Not worth vectorizing loop with uncountable "
10450+ " early exit, due to number of potentially "
10451+ " faulting loads" ,
10452+ " UncountableEarlyExitMayFault" , ORE, L);
10453+ return false ;
10454+ } else if (NumPotentiallyFaultingPointers)
10455+ LLVM_DEBUG (dbgs () << " LV: Need to version early-exit vector loop with "
10456+ << " pointer alignment checks.\n " );
1038810457 }
1038910458
1039010459 if (LVL.hasStructVectorCall ()) {
@@ -10542,8 +10611,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1054210611 unsigned SelectedIC = std::max (IC, UserIC);
1054310612 // Optimistically generate runtime checks if they are needed. Drop them if
1054410613 // they turn out to not be profitable.
10545- if (VF.Width .isVector () || SelectedIC > 1 )
10614+ if (VF.Width .isVector () || SelectedIC > 1 ) {
10615+ if (LVL.getNumPotentiallyFaultingLoads ()) {
10616+ assert (SelectedIC == 1 &&
10617+ " Interleaving not supported for early exit loops and "
10618+ " potentially faulting loads" );
10619+ assert (!CM.foldTailWithEVL () &&
10620+ " Explicit vector length unsupported for early exit loops and "
10621+ " potentially faulting loads" );
10622+ addPointerAlignmentChecks (LVL.getPotentiallyFaultingLoads (), F, PSE,
10623+ TTI, VF.Width );
10624+ }
1054610625 Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
10626+ }
1054710627
1054810628 // Check if it is profitable to vectorize with runtime checks.
1054910629 bool ForceVectorization =
0 commit comments