@@ -402,7 +402,7 @@ static cl::opt<bool> EnableEarlyExitVectorization(
402
402
" Enable vectorization of early exit loops with uncountable exits." ));
403
403
404
404
static cl::opt<unsigned > MaxNumPotentiallyFaultingPointers (
405
- " max-num-faulting-pointers" , cl::init(1 ), cl::Hidden,
405
+ " max-num-faulting-pointers" , cl::init(0 ), cl::Hidden,
406
406
cl::desc(
407
407
" The maximum number of potentially faulting pointers we permit when "
408
408
" vectorizing loops with uncountable exits." ));
@@ -1621,22 +1621,6 @@ class LoopVectorizationCostModel {
1621
1621
ElementCount MaxSafeVF,
1622
1622
bool FoldTailByMasking);
1623
1623
1624
- bool isSafeForAnyVectorWidth () const {
1625
- return Legal->isSafeForAnyVectorWidth () &&
1626
- (!Legal->hasUncountableEarlyExit () ||
1627
- !Legal->getNumPotentiallyFaultingLoads ());
1628
- }
1629
-
1630
- uint64_t getMaxSafeVectorWidthInBits () const {
1631
- uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits ();
1632
- // The legalizer bails out if getMinPageSize does not return a value.
1633
- if (Legal->hasUncountableEarlyExit () &&
1634
- Legal->getNumPotentiallyFaultingLoads ())
1635
- MaxSafeVectorWidth =
1636
- std::min (MaxSafeVectorWidth, uint64_t (*TTI.getMinPageSize ()) * 8 );
1637
- return MaxSafeVectorWidth;
1638
- }
1639
-
1640
1624
// / Checks if scalable vectorization is supported and enabled. Caches the
1641
1625
// / result to avoid repeated debug dumps for repeated queries.
1642
1626
bool isScalableVectorizationAllowed ();
@@ -2185,38 +2169,24 @@ class GeneratedRTChecks {
2185
2169
};
2186
2170
} // namespace
2187
2171
2188
- std::optional<unsigned > getMaxVScale (const Function &F,
2189
- const TargetTransformInfo &TTI) {
2190
- if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2191
- return MaxVScale;
2192
-
2193
- if (F.hasFnAttribute (Attribute::VScaleRange))
2194
- return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2195
-
2196
- return std::nullopt;
2197
- }
2198
-
2199
2172
static void addPointerAlignmentChecks (
2200
- const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads ,
2201
- Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
2202
- ElementCount VF ) {
2173
+ const SmallVectorImpl<std::pair<const SCEV *, Type *>> *Ptrs, Function *F ,
2174
+ PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF ,
2175
+ unsigned IC ) {
2203
2176
ScalarEvolution *SE = PSE.getSE ();
2204
2177
const DataLayout &DL = SE->getDataLayout ();
2205
- Type *PtrIntType = DL.getIntPtrType (SE->getContext ());
2206
2178
2207
- const SCEV *Zero = SE->getZero (PtrIntType);
2208
- const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF);
2209
-
2210
- for (auto Load : *Loads) {
2211
- APInt EltSize (
2212
- DL.getIndexTypeSizeInBits (Load.first ->getPointerOperandType ()),
2213
- DL.getTypeStoreSize (Load.first ->getType ()).getFixedValue ());
2214
- const SCEV *Start = SE->getPtrToIntExpr (Load.second , PtrIntType);
2179
+ for (auto Ptr : *Ptrs) {
2180
+ Type *PtrIntType = DL.getIntPtrType (Ptr .first ->getType ());
2181
+ APInt EltSize (PtrIntType->getScalarSizeInBits (),
2182
+ DL.getTypeStoreSize (Ptr .second ).getFixedValue ());
2183
+ const SCEV *Start = SE->getPtrToIntExpr (Ptr .first , PtrIntType);
2184
+ const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF * IC);
2215
2185
const SCEV *Align =
2216
2186
SE->getMulExpr (ScevEC, SE->getConstant (EltSize),
2217
2187
(SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
2218
2188
const SCEV *Rem = SE->getURemExpr (Start, Align);
2219
- PSE.addPredicate (*(SE->getEqualPredicate (Rem, Zero )));
2189
+ PSE.addPredicate (*(SE->getEqualPredicate (Rem, SE-> getZero (PtrIntType) )));
2220
2190
}
2221
2191
}
2222
2192
@@ -2389,6 +2359,17 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
2389
2359
llvm_unreachable (" invalid enum" );
2390
2360
}
2391
2361
2362
+ std::optional<unsigned > getMaxVScale (const Function &F,
2363
+ const TargetTransformInfo &TTI) {
2364
+ if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2365
+ return MaxVScale;
2366
+
2367
+ if (F.hasFnAttribute (Attribute::VScaleRange))
2368
+ return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2369
+
2370
+ return std::nullopt;
2371
+ }
2372
+
2392
2373
// / For the given VF and UF and maximum trip count computed for the loop, return
2393
2374
// / whether the induction variable might overflow in the vectorized loop. If not,
2394
2375
// / then we know a runtime overflow check always evaluates to false and can be
@@ -3881,15 +3862,15 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
3881
3862
return false ;
3882
3863
}
3883
3864
3884
- if (!isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3865
+ if (!Legal-> isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3885
3866
reportVectorizationInfo (" The target does not provide maximum vscale value "
3886
3867
" for safe distance analysis." ,
3887
3868
" ScalableVFUnfeasible" , ORE, TheLoop);
3888
3869
return false ;
3889
3870
}
3890
3871
3891
3872
if (Legal->hasUncountableEarlyExit () &&
3892
- Legal->getNumPotentiallyFaultingLoads () &&
3873
+ Legal->getNumPotentiallyFaultingPointers () &&
3893
3874
!TTI.isVScaleKnownToBeAPowerOfTwo ()) {
3894
3875
reportVectorizationInfo (" Cannot vectorize potentially faulting early exit "
3895
3876
" loop with scalable vectors." ,
@@ -3908,7 +3889,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
3908
3889
3909
3890
auto MaxScalableVF = ElementCount::getScalable (
3910
3891
std::numeric_limits<ElementCount::ScalarTy>::max ());
3911
- if (isSafeForAnyVectorWidth ())
3892
+ if (Legal-> isSafeForAnyVectorWidth ())
3912
3893
return MaxScalableVF;
3913
3894
3914
3895
std::optional<unsigned > MaxVScale = getMaxVScale (*TheFunction, TTI);
@@ -3935,11 +3916,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
3935
3916
// the memory accesses that is most restrictive (involved in the smallest
3936
3917
// dependence distance).
3937
3918
unsigned MaxSafeElements =
3938
- llvm::bit_floor (getMaxSafeVectorWidthInBits () / WidestType);
3919
+ llvm::bit_floor (Legal-> getMaxSafeVectorWidthInBits () / WidestType);
3939
3920
3940
3921
auto MaxSafeFixedVF = ElementCount::getFixed (MaxSafeElements);
3941
3922
auto MaxSafeScalableVF = getMaxLegalScalableVF (MaxSafeElements);
3942
- if (!isSafeForAnyVectorWidth ())
3923
+ if (!Legal-> isSafeForAnyVectorWidth ())
3943
3924
this ->MaxSafeElements = MaxSafeElements;
3944
3925
3945
3926
LLVM_DEBUG (dbgs () << " LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10492,7 +10473,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10492
10473
}
10493
10474
10494
10475
unsigned NumPotentiallyFaultingPointers =
10495
- LVL.getNumPotentiallyFaultingLoads ();
10476
+ LVL.getNumPotentiallyFaultingPointers ();
10496
10477
if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
10497
10478
reportVectorizationFailure (" Not worth vectorizing loop with uncountable "
10498
10479
" early exit, due to number of potentially "
@@ -10660,15 +10641,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10660
10641
// Optimistically generate runtime checks if they are needed. Drop them if
10661
10642
// they turn out to not be profitable.
10662
10643
if (VF.Width .isVector () || SelectedIC > 1 ) {
10663
- if (LVL.getNumPotentiallyFaultingLoads ()) {
10664
- assert (SelectedIC == 1 &&
10665
- " Interleaving not supported for early exit loops and "
10666
- " potentially faulting loads" );
10644
+ if (LVL.getNumPotentiallyFaultingPointers ()) {
10667
10645
assert (!CM.foldTailWithEVL () &&
10668
10646
" Explicit vector length unsupported for early exit loops and "
10669
10647
" potentially faulting loads" );
10670
- addPointerAlignmentChecks (LVL.getPotentiallyFaultingLoads (), F, PSE,
10671
- TTI, VF.Width );
10648
+ addPointerAlignmentChecks (LVL.getPotentiallyFaultingPointers (), F, PSE,
10649
+ TTI, VF.Width , SelectedIC );
10672
10650
}
10673
10651
Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
10674
10652
}
0 commit comments