@@ -442,6 +442,8 @@ static std::optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE,
442
442
namespace {
443
443
// Forward declare GeneratedRTChecks.
444
444
class GeneratedRTChecks ;
445
+
446
+ using SCEV2ValueTy = DenseMap<const SCEV *, Value *>;
445
447
} // namespace
446
448
447
449
namespace llvm {
@@ -497,8 +499,10 @@ class InnerLoopVectorizer {
497
499
// / loop and the start value for the canonical induction, if it is != 0. The
498
500
// / latter is the case when vectorizing the epilogue loop. In the case of
499
501
// / epilogue vectorization, this function is overriden to handle the more
500
- // / complex control flow around the loops.
501
- virtual std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton ();
502
+ // / complex control flow around the loops. \p ExpandedSCEVs is used to
503
+ // / look up SCEV expansions for expressions needed during skeleton creation.
504
+ virtual std::pair<BasicBlock *, Value *>
505
+ createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs);
502
506
503
507
// / Fix the vectorized code, taking care of header phi's, live-outs, and more.
504
508
void fixVectorizedLoop (VPTransformState &State, VPlan &Plan);
@@ -555,12 +559,13 @@ class InnerLoopVectorizer {
555
559
556
560
// / Create a new phi node for the induction variable \p OrigPhi to resume
557
561
// / iteration count in the scalar epilogue, from where the vectorized loop
558
- // / left off. In cases where the loop skeleton is more complicated (eg.
559
- // / epilogue vectorization) and the resume values can come from an additional
560
- // / bypass block, the \p AdditionalBypass pair provides information about the
561
- // / bypass block and the end value on the edge from bypass to this loop.
562
+ // / left off. \p Step is the SCEV-expanded induction step to use. In cases
563
+ // / where the loop skeleton is more complicated (i.e., epilogue vectorization)
564
+ // / and the resume values can come from an additional bypass block, the \p
565
+ // / AdditionalBypass pair provides information about the bypass block and the
566
+ // / end value on the edge from bypass to this loop.
562
567
PHINode *createInductionResumeValue (
563
- PHINode *OrigPhi, const InductionDescriptor &ID,
568
+ PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
564
569
ArrayRef<BasicBlock *> BypassBlocks,
565
570
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
566
571
@@ -646,6 +651,7 @@ class InnerLoopVectorizer {
646
651
// / block, the \p AdditionalBypass pair provides information about the bypass
647
652
// / block and the end value on the edge from bypass to this loop.
648
653
void createInductionResumeValues (
654
+ const SCEV2ValueTy &ExpandedSCEVs,
649
655
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
650
656
651
657
// / Complete the loop skeleton by adding debug MDs, creating appropriate
@@ -835,15 +841,18 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
835
841
836
842
// Override this function to handle the more complex control flow around the
837
843
// three loops.
838
- std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton () final {
839
- return createEpilogueVectorizedLoopSkeleton ();
844
+ std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton (
845
+
846
+ const SCEV2ValueTy &ExpandedSCEVs) final {
847
+
848
+ return createEpilogueVectorizedLoopSkeleton (ExpandedSCEVs);
840
849
}
841
850
842
851
// / The interface for creating a vectorized skeleton using one of two
843
852
// / different strategies, each corresponding to one execution of the vplan
844
853
// / as described above.
845
854
virtual std::pair<BasicBlock *, Value *>
846
- createEpilogueVectorizedLoopSkeleton () = 0;
855
+ createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs ) = 0;
847
856
848
857
// / Holds and updates state information required to vectorize the main loop
849
858
// / and its epilogue in two separate passes. This setup helps us avoid
@@ -871,7 +880,8 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
871
880
EPI, LVL, CM, BFI, PSI, Check) {}
872
881
// / Implements the interface for creating a vectorized skeleton using the
873
882
// / *main loop* strategy (ie the first pass of vplan execution).
874
- std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton () final ;
883
+ std::pair<BasicBlock *, Value *>
884
+ createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
875
885
876
886
protected:
877
887
// / Emits an iteration count bypass check once for the main loop (when \p
@@ -901,7 +911,8 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
901
911
}
902
912
// / Implements the interface for creating a vectorized skeleton using the
903
913
// / *epilogue loop* strategy (ie the second pass of vplan execution).
904
- std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton () final ;
914
+ std::pair<BasicBlock *, Value *>
915
+ createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
905
916
906
917
protected:
907
918
// / Emits an iteration count bypass check after the main vector loop has
@@ -2424,21 +2435,6 @@ static void buildScalarSteps(Value *ScalarIV, Value *Step,
2424
2435
}
2425
2436
}
2426
2437
2427
- // Generate code for the induction step. Note that induction steps are
2428
- // required to be loop-invariant
2429
- static Value *CreateStepValue (const SCEV *Step, ScalarEvolution &SE,
2430
- Instruction *InsertBefore,
2431
- Loop *OrigLoop = nullptr ) {
2432
- const DataLayout &DL = SE.getDataLayout ();
2433
- assert ((!OrigLoop || SE.isLoopInvariant (Step, OrigLoop)) &&
2434
- " Induction step should be loop invariant" );
2435
- if (auto *E = dyn_cast<SCEVUnknown>(Step))
2436
- return E->getValue ();
2437
-
2438
- SCEVExpander Exp (SE, DL, " induction" );
2439
- return Exp.expandCodeFor (Step, Step->getType (), InsertBefore);
2440
- }
2441
-
2442
2438
// / Compute the transformed value of Index at offset StartValue using step
2443
2439
// / StepValue.
2444
2440
// / For integer induction, returns StartValue + Index * StepValue.
@@ -3142,7 +3138,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
3142
3138
}
3143
3139
3144
3140
PHINode *InnerLoopVectorizer::createInductionResumeValue (
3145
- PHINode *OrigPhi, const InductionDescriptor &II,
3141
+ PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
3146
3142
ArrayRef<BasicBlock *> BypassBlocks,
3147
3143
std::pair<BasicBlock *, Value *> AdditionalBypass) {
3148
3144
Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
@@ -3161,17 +3157,13 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
3161
3157
if (II.getInductionBinOp () && isa<FPMathOperator>(II.getInductionBinOp ()))
3162
3158
B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
3163
3159
3164
- Value *Step =
3165
- CreateStepValue (II.getStep (), *PSE.getSE (), &*B.GetInsertPoint ());
3166
3160
EndValue =
3167
3161
emitTransformedIndex (B, VectorTripCount, II.getStartValue (), Step, II);
3168
3162
EndValue->setName (" ind.end" );
3169
3163
3170
3164
// Compute the end value for the additional bypass (if applicable).
3171
3165
if (AdditionalBypass.first ) {
3172
3166
B.SetInsertPoint (&(*AdditionalBypass.first ->getFirstInsertionPt ()));
3173
- Value *Step =
3174
- CreateStepValue (II.getStep (), *PSE.getSE (), &*B.GetInsertPoint ());
3175
3167
EndValueFromAdditionalBypass = emitTransformedIndex (
3176
3168
B, AdditionalBypass.second , II.getStartValue (), Step, II);
3177
3169
EndValueFromAdditionalBypass->setName (" ind.end" );
@@ -3200,7 +3192,22 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
3200
3192
return BCResumeVal;
3201
3193
}
3202
3194
3195
+ // / Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
3196
+ // / expansion results.
3197
+ static Value *getExpandedStep (const InductionDescriptor &ID,
3198
+ const SCEV2ValueTy &ExpandedSCEVs) {
3199
+ const SCEV *Step = ID.getStep ();
3200
+ if (auto *C = dyn_cast<SCEVConstant>(Step))
3201
+ return C->getValue ();
3202
+ if (auto *U = dyn_cast<SCEVUnknown>(Step))
3203
+ return U->getValue ();
3204
+ auto I = ExpandedSCEVs.find (Step);
3205
+ assert (I != ExpandedSCEVs.end () && " SCEV must be expanded at this point" );
3206
+ return I->second ;
3207
+ }
3208
+
3203
3209
void InnerLoopVectorizer::createInductionResumeValues (
3210
+ const SCEV2ValueTy &ExpandedSCEVs,
3204
3211
std::pair<BasicBlock *, Value *> AdditionalBypass) {
3205
3212
assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
3206
3213
(!AdditionalBypass.first && !AdditionalBypass.second )) &&
@@ -3216,7 +3223,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
3216
3223
PHINode *OrigPhi = InductionEntry.first ;
3217
3224
const InductionDescriptor &II = InductionEntry.second ;
3218
3225
PHINode *BCResumeVal = createInductionResumeValue (
3219
- OrigPhi, II, LoopBypassBlocks, AdditionalBypass);
3226
+ OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
3227
+ AdditionalBypass);
3220
3228
OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
3221
3229
}
3222
3230
}
@@ -3257,7 +3265,8 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
3257
3265
}
3258
3266
3259
3267
std::pair<BasicBlock *, Value *>
3260
- InnerLoopVectorizer::createVectorizedLoopSkeleton () {
3268
+ InnerLoopVectorizer::createVectorizedLoopSkeleton (
3269
+ const SCEV2ValueTy &ExpandedSCEVs) {
3261
3270
/*
3262
3271
In this function we generate a new loop. The new loop will contain
3263
3272
the vectorized instructions while the old loop will continue to run the
@@ -3312,7 +3321,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
3312
3321
emitMemRuntimeChecks (LoopScalarPreHeader);
3313
3322
3314
3323
// Emit phis for the new starting index of the scalar loop.
3315
- createInductionResumeValues ();
3324
+ createInductionResumeValues (ExpandedSCEVs );
3316
3325
3317
3326
return {completeLoopSkeleton (), nullptr };
3318
3327
}
@@ -7674,11 +7683,9 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
7674
7683
}
7675
7684
}
7676
7685
7677
- void LoopVectorizationPlanner::executePlan (ElementCount BestVF, unsigned BestUF,
7678
- VPlan &BestVPlan,
7679
- InnerLoopVectorizer &ILV,
7680
- DominatorTree *DT,
7681
- bool IsEpilogueVectorization) {
7686
+ SCEV2ValueTy LoopVectorizationPlanner::executePlan (
7687
+ ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
7688
+ InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization) {
7682
7689
assert (BestVPlan.hasVF (BestVF) &&
7683
7690
" Trying to execute plan with unsupported VF" );
7684
7691
assert (BestVPlan.hasUF (BestUF) &&
@@ -7710,7 +7717,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
7710
7717
// middle block. The vector loop is created during VPlan execution.
7711
7718
Value *CanonicalIVStartValue;
7712
7719
std::tie (State.CFG .PrevBB , CanonicalIVStartValue) =
7713
- ILV.createVectorizedLoopSkeleton ();
7720
+ ILV.createVectorizedLoopSkeleton (State. ExpandedSCEVs );
7714
7721
7715
7722
// Only use noalias metadata when using memory checks guaranteeing no overlap
7716
7723
// across all iterations.
@@ -7778,6 +7785,8 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
7778
7785
ILV.fixVectorizedLoop (State, BestVPlan);
7779
7786
7780
7787
ILV.printDebugTracesAtEnd ();
7788
+
7789
+ return State.ExpandedSCEVs ;
7781
7790
}
7782
7791
7783
7792
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -7799,7 +7808,8 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
7799
7808
// / This function is partially responsible for generating the control flow
7800
7809
// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7801
7810
std::pair<BasicBlock *, Value *>
7802
- EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton () {
7811
+ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton (
7812
+ const SCEV2ValueTy &ExpandedSCEVs) {
7803
7813
createVectorLoopSkeleton (" " );
7804
7814
7805
7815
// Generate the code to check the minimum iteration count of the vector
@@ -7917,7 +7927,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
7917
7927
// / This function is partially responsible for generating the control flow
7918
7928
// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7919
7929
std::pair<BasicBlock *, Value *>
7920
- EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton () {
7930
+ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton (
7931
+ const SCEV2ValueTy &ExpandedSCEVs) {
7921
7932
createVectorLoopSkeleton (" vec.epilog." );
7922
7933
7923
7934
// Now, compare the remaining count and if there aren't enough iterations to
@@ -8015,7 +8026,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
8015
8026
// check, then the resume value for the induction variable comes from
8016
8027
// the trip count of the main vector loop, hence passing the AdditionalBypass
8017
8028
// argument.
8018
- createInductionResumeValues ({VecEpilogueIterationCountCheck,
8029
+ createInductionResumeValues (ExpandedSCEVs,
8030
+ {VecEpilogueIterationCountCheck,
8019
8031
EPI.VectorTripCount } /* AdditionalBypass */ );
8020
8032
8021
8033
return {completeLoopSkeleton (), EPResumeVal};
@@ -10387,8 +10399,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10387
10399
EPI, &LVL, &CM, BFI, PSI, Checks);
10388
10400
10389
10401
VPlan &BestMainPlan = LVP.getBestPlanFor (EPI.MainLoopVF );
10390
- LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF , BestMainPlan, MainILV ,
10391
- DT, true );
10402
+ auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10403
+ BestMainPlan, MainILV, DT, true );
10392
10404
++LoopsVectorized;
10393
10405
10394
10406
// Second pass vectorizes the epilogue and adjusts the control flow
@@ -10442,7 +10454,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10442
10454
}
10443
10455
10444
10456
ResumeV = MainILV.createInductionResumeValue (
10445
- IndPhi, *ID, {EPI.MainLoopIterationCountCheck });
10457
+ IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10458
+ {EPI.MainLoopIterationCountCheck });
10446
10459
}
10447
10460
assert (ResumeV && " Must have a resume value" );
10448
10461
VPValue *StartVal = BestEpiPlan.getVPValueOrAddLiveIn (ResumeV);
0 commit comments