@@ -400,6 +400,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
400
400
cl::desc(
401
401
" Enable vectorization of early exit loops with uncountable exits." ));
402
402
403
+ static cl::opt<unsigned > MaxNumPotentiallyFaultingPointers (
404
+ " max-num-faulting-pointers" , cl::init(1 ), cl::Hidden,
405
+ cl::desc(
406
+ " The maximum number of potentially faulting pointers we permit when "
407
+ " vectorizing loops with uncountable exits." ));
408
+
403
409
// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
404
410
// variables not overflowing do not hold. See `emitSCEVChecks`.
405
411
static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1585,6 +1591,22 @@ class LoopVectorizationCostModel {
1585
1591
ElementCount MaxSafeVF,
1586
1592
bool FoldTailByMasking);
1587
1593
1594
+ bool isSafeForAnyVectorWidth () const {
1595
+ return Legal->isSafeForAnyVectorWidth () &&
1596
+ (!Legal->hasUncountableEarlyExit () ||
1597
+ !Legal->getNumPotentiallyFaultingLoads ());
1598
+ }
1599
+
1600
+ uint64_t getMaxSafeVectorWidthInBits () const {
1601
+ uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits ();
1602
+ // The legalizer bails out if getMinPageSize does not return a value.
1603
+ if (Legal->hasUncountableEarlyExit () &&
1604
+ Legal->getNumPotentiallyFaultingLoads ())
1605
+ MaxSafeVectorWidth =
1606
+ std::min (MaxSafeVectorWidth, uint64_t (*TTI.getMinPageSize ()) * 8 );
1607
+ return MaxSafeVectorWidth;
1608
+ }
1609
+
1588
1610
// / Checks if scalable vectorization is supported and enabled. Caches the
1589
1611
// / result to avoid repeated debug dumps for repeated queries.
1590
1612
bool isScalableVectorizationAllowed ();
@@ -2133,6 +2155,41 @@ class GeneratedRTChecks {
2133
2155
};
2134
2156
} // namespace
2135
2157
2158
+ std::optional<unsigned > getMaxVScale (const Function &F,
2159
+ const TargetTransformInfo &TTI) {
2160
+ if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2161
+ return MaxVScale;
2162
+
2163
+ if (F.hasFnAttribute (Attribute::VScaleRange))
2164
+ return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2165
+
2166
+ return std::nullopt;
2167
+ }
2168
+
2169
+ static void addPointerAlignmentChecks (
2170
+ const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads,
2171
+ Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
2172
+ ElementCount VF) {
2173
+ ScalarEvolution *SE = PSE.getSE ();
2174
+ const DataLayout &DL = SE->getDataLayout ();
2175
+ Type *PtrIntType = DL.getIntPtrType (SE->getContext ());
2176
+
2177
+ const SCEV *Zero = SE->getZero (PtrIntType);
2178
+ const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF);
2179
+
2180
+ for (auto Load : *Loads) {
2181
+ APInt EltSize (
2182
+ DL.getIndexTypeSizeInBits (Load.first ->getPointerOperandType ()),
2183
+ DL.getTypeStoreSize (Load.first ->getType ()).getFixedValue ());
2184
+ const SCEV *Start = SE->getPtrToIntExpr (Load.second , PtrIntType);
2185
+ const SCEV *Align =
2186
+ SE->getMulExpr (ScevEC, SE->getConstant (EltSize),
2187
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
2188
+ const SCEV *Rem = SE->getURemExpr (Start, Align);
2189
+ PSE.addPredicate (*(SE->getEqualPredicate (Rem, Zero)));
2190
+ }
2191
+ }
2192
+
2136
2193
static bool useActiveLaneMask (TailFoldingStyle Style ) {
2137
2194
return Style == TailFoldingStyle::Data ||
2138
2195
Style == TailFoldingStyle::DataAndControlFlow ||
@@ -2302,17 +2359,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
2302
2359
llvm_unreachable (" invalid enum" );
2303
2360
}
2304
2361
2305
- std::optional<unsigned > getMaxVScale (const Function &F,
2306
- const TargetTransformInfo &TTI) {
2307
- if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2308
- return MaxVScale;
2309
-
2310
- if (F.hasFnAttribute (Attribute::VScaleRange))
2311
- return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2312
-
2313
- return std::nullopt;
2314
- }
2315
-
2316
2362
// / For the given VF and UF and maximum trip count computed for the loop, return
2317
2363
// / whether the induction variable might overflow in the vectorized loop. If not,
2318
2364
// / then we know a runtime overflow check always evaluates to false and can be
@@ -3796,13 +3842,22 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
3796
3842
return false ;
3797
3843
}
3798
3844
3799
- if (!Legal-> isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3845
+ if (!isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3800
3846
reportVectorizationInfo (" The target does not provide maximum vscale value "
3801
3847
" for safe distance analysis." ,
3802
3848
" ScalableVFUnfeasible" , ORE, TheLoop);
3803
3849
return false ;
3804
3850
}
3805
3851
3852
+ if (Legal->hasUncountableEarlyExit () &&
3853
+ Legal->getNumPotentiallyFaultingLoads () &&
3854
+ !TTI.isVScaleKnownToBeAPowerOfTwo ()) {
3855
+ reportVectorizationInfo (" Cannot vectorize potentially faulting early exit "
3856
+ " loop with scalable vectors." ,
3857
+ " ScalableVFUnfeasible" , ORE, TheLoop);
3858
+ return false ;
3859
+ }
3860
+
3806
3861
IsScalableVectorizationAllowed = true ;
3807
3862
return true ;
3808
3863
}
@@ -3814,7 +3869,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
3814
3869
3815
3870
auto MaxScalableVF = ElementCount::getScalable (
3816
3871
std::numeric_limits<ElementCount::ScalarTy>::max ());
3817
- if (Legal-> isSafeForAnyVectorWidth ())
3872
+ if (isSafeForAnyVectorWidth ())
3818
3873
return MaxScalableVF;
3819
3874
3820
3875
std::optional<unsigned > MaxVScale = getMaxVScale (*TheFunction, TTI);
@@ -3841,11 +3896,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
3841
3896
// the memory accesses that is most restrictive (involved in the smallest
3842
3897
// dependence distance).
3843
3898
unsigned MaxSafeElements =
3844
- llvm::bit_floor (Legal-> getMaxSafeVectorWidthInBits () / WidestType);
3899
+ llvm::bit_floor (getMaxSafeVectorWidthInBits () / WidestType);
3845
3900
3846
3901
auto MaxSafeFixedVF = ElementCount::getFixed (MaxSafeElements);
3847
3902
auto MaxSafeScalableVF = getMaxLegalScalableVF (MaxSafeElements);
3848
- if (!Legal-> isSafeForAnyVectorWidth ())
3903
+ if (!isSafeForAnyVectorWidth ())
3849
3904
this ->MaxSafeElements = MaxSafeElements;
3850
3905
3851
3906
LLVM_DEBUG (dbgs () << " LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10380,11 +10435,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10380
10435
return false ;
10381
10436
}
10382
10437
10383
- if (LVL.hasUncountableEarlyExit () && !EnableEarlyExitVectorization) {
10384
- reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10385
- " early exit is not enabled" ,
10386
- " UncountableEarlyExitLoopsDisabled" , ORE, L);
10387
- return false ;
10438
+ if (LVL.hasUncountableEarlyExit ()) {
10439
+ if (!EnableEarlyExitVectorization) {
10440
+ reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10441
+ " early exit is not enabled" ,
10442
+ " UncountableEarlyExitLoopsDisabled" , ORE, L);
10443
+ return false ;
10444
+ }
10445
+
10446
+ unsigned NumPotentiallyFaultingPointers =
10447
+ LVL.getNumPotentiallyFaultingLoads ();
10448
+ if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
10449
+ reportVectorizationFailure (" Not worth vectorizing loop with uncountable "
10450
+ " early exit, due to number of potentially "
10451
+ " faulting loads" ,
10452
+ " UncountableEarlyExitMayFault" , ORE, L);
10453
+ return false ;
10454
+ } else if (NumPotentiallyFaultingPointers)
10455
+ LLVM_DEBUG (dbgs () << " LV: Need to version early-exit vector loop with "
10456
+ << " pointer alignment checks.\n " );
10388
10457
}
10389
10458
10390
10459
if (LVL.hasStructVectorCall ()) {
@@ -10542,8 +10611,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10542
10611
unsigned SelectedIC = std::max (IC, UserIC);
10543
10612
// Optimistically generate runtime checks if they are needed. Drop them if
10544
10613
// they turn out to not be profitable.
10545
- if (VF.Width .isVector () || SelectedIC > 1 )
10614
+ if (VF.Width .isVector () || SelectedIC > 1 ) {
10615
+ if (LVL.getNumPotentiallyFaultingLoads ()) {
10616
+ assert (SelectedIC == 1 &&
10617
+ " Interleaving not supported for early exit loops and "
10618
+ " potentially faulting loads" );
10619
+ assert (!CM.foldTailWithEVL () &&
10620
+ " Explicit vector length unsupported for early exit loops and "
10621
+ " potentially faulting loads" );
10622
+ addPointerAlignmentChecks (LVL.getPotentiallyFaultingLoads (), F, PSE,
10623
+ TTI, VF.Width );
10624
+ }
10546
10625
Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
10626
+ }
10547
10627
10548
10628
// Check if it is profitable to vectorize with runtime checks.
10549
10629
bool ForceVectorization =
0 commit comments