@@ -545,11 +545,6 @@ class InnerLoopVectorizer {
545545 // Return true if any runtime check is added.
546546 bool areSafetyChecksAdded () { return AddedSafetyChecks; }
547547
548- // / A type for vectorized values in the new loop. Each value from the
549- // / original loop, when vectorized, is represented by UF vector values in the
550- // / new unrolled loop, where UF is the unroll factor.
551- using VectorParts = SmallVector<Value *, 2 >;
552-
553548 // / A helper function to scalarize a single Instruction in the innermost loop.
554549 // / Generates a sequence of scalar instances for each lane between \p MinLane
555550 // / and \p MaxLane, times each part between \p MinPart and \p MaxPart,
@@ -8086,7 +8081,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
80868081 BlockMaskCache[BB] = BlockMask;
80878082}
80888083
8089- VPWidenMemoryInstructionRecipe *
8084+ VPWidenMemoryRecipe *
80908085VPRecipeBuilder::tryToWidenMemory (Instruction *I, ArrayRef<VPValue *> Operands,
80918086 VFRange &Range) {
80928087 assert ((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8131,12 +8126,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
81318126 Ptr = VectorPtr;
81328127 }
81338128 if (LoadInst *Load = dyn_cast<LoadInst>(I))
8134- return new VPWidenMemoryInstructionRecipe (*Load, Ptr, Mask, Consecutive,
8135- Reverse, I->getDebugLoc ());
8129+ return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse ,
8130+ I->getDebugLoc ());
81368131
81378132 StoreInst *Store = cast<StoreInst>(I);
8138- return new VPWidenMemoryInstructionRecipe (
8139- *Store, Ptr, Operands[ 0 ], Mask, Consecutive, Reverse, I->getDebugLoc ());
8133+ return new VPWidenStoreRecipe (*Store, Ptr, Operands[ 0 ], Mask, Consecutive,
8134+ Reverse, I->getDebugLoc ());
81408135}
81418136
81428137// / Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8775,13 +8770,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
87758770 // for this VPlan, replace the Recipes widening its memory instructions with a
87768771 // single VPInterleaveRecipe at its insertion point.
87778772 for (const auto *IG : InterleaveGroups) {
8778- auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
8779- RecipeBuilder.getRecipe (IG->getInsertPos ()));
8773+ auto *Recipe =
8774+ cast<VPWidenMemoryRecipe>( RecipeBuilder.getRecipe (IG->getInsertPos ()));
87808775 SmallVector<VPValue *, 4 > StoredValues;
87818776 for (unsigned i = 0 ; i < IG->getFactor (); ++i)
87828777 if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember (i))) {
8783- auto *StoreR =
8784- cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe (SI));
8778+ auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe (SI));
87858779 StoredValues.push_back (StoreR->getStoredValue ());
87868780 }
87878781
@@ -9368,92 +9362,27 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
93689362 return Call;
93699363}
93709364
9371- void VPWidenMemoryInstructionRecipe::execute (VPTransformState &State) {
9372- VPValue *StoredValue = isStore () ? getStoredValue () : nullptr ;
9373-
9374- // Attempt to issue a wide load.
9375- LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
9376- StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
9377-
9378- assert ((LI || SI) && " Invalid Load/Store instruction" );
9379- assert ((!SI || StoredValue) && " No stored value provided for widened store" );
9380- assert ((!LI || !StoredValue) && " Stored value provided for widened load" );
9365+ void VPWidenLoadRecipe::execute (VPTransformState &State) {
9366+ auto *LI = cast<LoadInst>(&Ingredient);
93819367
93829368 Type *ScalarDataTy = getLoadStoreType (&Ingredient);
9383-
93849369 auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
93859370 const Align Alignment = getLoadStoreAlignment (&Ingredient);
9386- bool CreateGatherScatter = !isConsecutive ();
9371+ bool CreateGather = !isConsecutive ();
93879372
93889373 auto &Builder = State.Builder ;
9389- InnerLoopVectorizer::VectorParts BlockInMaskParts (State.UF );
9390- bool isMaskRequired = getMask ();
9391- if (isMaskRequired) {
9392- // Mask reversal is only needed for non-all-one (null) masks, as reverse of
9393- // a null all-one mask is a null mask.
9394- for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9395- Value *Mask = State.get (getMask (), Part);
9374+ State.setDebugLocFrom (getDebugLoc ());
9375+ for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9376+ Value *NewLI;
9377+ Value *Mask = nullptr ;
9378+ if (auto *VPMask = getMask ()) {
9379+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
9380+ // of a null all-one mask is a null mask.
9381+ Mask = State.get (VPMask, Part);
93969382 if (isReverse ())
93979383 Mask = Builder.CreateVectorReverse (Mask, " reverse" );
9398- BlockInMaskParts[Part] = Mask;
9399- }
9400- }
9401-
9402- // Handle Stores:
9403- if (SI) {
9404- State.setDebugLocFrom (getDebugLoc ());
9405-
9406- for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9407- Instruction *NewSI = nullptr ;
9408- Value *StoredVal = State.get (StoredValue, Part);
9409- // TODO: split this into several classes for better design.
9410- if (State.EVL ) {
9411- assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
9412- " explicit vector length." );
9413- assert (cast<VPInstruction>(State.EVL )->getOpcode () ==
9414- VPInstruction::ExplicitVectorLength &&
9415- " EVL must be VPInstruction::ExplicitVectorLength." );
9416- Value *EVL = State.get (State.EVL , VPIteration (0 , 0 ));
9417- // If EVL is not nullptr, then EVL must be a valid value set during plan
9418- // creation, possibly default value = whole vector register length. EVL
9419- // is created only if TTI prefers predicated vectorization, thus if EVL
9420- // is not nullptr it also implies preference for predicated
9421- // vectorization.
9422- // FIXME: Support reverse store after vp_reverse is added.
9423- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9424- NewSI = lowerStoreUsingVectorIntrinsics (
9425- Builder, State.get (getAddr (), Part, !CreateGatherScatter),
9426- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
9427- } else if (CreateGatherScatter) {
9428- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9429- Value *VectorGep = State.get (getAddr (), Part);
9430- NewSI = Builder.CreateMaskedScatter (StoredVal, VectorGep, Alignment,
9431- MaskPart);
9432- } else {
9433- if (isReverse ()) {
9434- // If we store to reverse consecutive memory locations, then we need
9435- // to reverse the order of elements in the stored value.
9436- StoredVal = Builder.CreateVectorReverse (StoredVal, " reverse" );
9437- // We don't want to update the value in the map as it might be used in
9438- // another expression. So don't call resetVectorValue(StoredVal).
9439- }
9440- auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9441- if (isMaskRequired)
9442- NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr, Alignment,
9443- BlockInMaskParts[Part]);
9444- else
9445- NewSI = Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment);
9446- }
9447- State.addMetadata (NewSI, SI);
94489384 }
9449- return ;
9450- }
94519385
9452- // Handle loads.
9453- assert (LI && " Must have a load instruction" );
9454- State.setDebugLocFrom (getDebugLoc ());
9455- for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9456- Value *NewLI;
94579386 // TODO: split this into several classes for better design.
94589387 if (State.EVL ) {
94599388 assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
@@ -9468,22 +9397,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
94689397 // is not nullptr it also implies preference for predicated
94699398 // vectorization.
94709399 // FIXME: Support reverse loading after vp_reverse is added.
9471- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
94729400 NewLI = lowerLoadUsingVectorIntrinsics (
9473- Builder, DataTy, State.get (getAddr (), Part, !CreateGatherScatter),
9474- CreateGatherScatter, MaskPart, EVL, Alignment);
9475- } else if (CreateGatherScatter) {
9476- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9401+ Builder, DataTy, State.get (getAddr (), Part, !CreateGather),
9402+ CreateGather, Mask, EVL, Alignment);
9403+ } else if (CreateGather) {
94779404 Value *VectorGep = State.get (getAddr (), Part);
9478- NewLI = Builder.CreateMaskedGather (DataTy, VectorGep, Alignment, MaskPart ,
9405+ NewLI = Builder.CreateMaskedGather (DataTy, VectorGep, Alignment, Mask ,
94799406 nullptr , " wide.masked.gather" );
94809407 State.addMetadata (NewLI, LI);
94819408 } else {
94829409 auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9483- if (isMaskRequired )
9484- NewLI = Builder.CreateMaskedLoad (
9485- DataTy, VecPtr, Alignment, BlockInMaskParts[Part] ,
9486- PoisonValue::get (DataTy), " wide.masked.load" );
9410+ if (Mask )
9411+ NewLI = Builder.CreateMaskedLoad (DataTy, VecPtr, Alignment, Mask,
9412+ PoisonValue::get (DataTy) ,
9413+ " wide.masked.load" );
94879414 else
94889415 NewLI =
94899416 Builder.CreateAlignedLoad (DataTy, VecPtr, Alignment, " wide.load" );
@@ -9494,7 +9421,69 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
94949421 NewLI = Builder.CreateVectorReverse (NewLI, " reverse" );
94959422 }
94969423
9497- State.set (getVPSingleValue (), NewLI, Part);
9424+ State.set (this , NewLI, Part);
9425+ }
9426+ }
9427+
9428+ void VPWidenStoreRecipe::execute (VPTransformState &State) {
9429+ auto *SI = cast<StoreInst>(&Ingredient);
9430+
9431+ VPValue *StoredVPValue = getStoredValue ();
9432+ bool CreateScatter = !isConsecutive ();
9433+ const Align Alignment = getLoadStoreAlignment (&Ingredient);
9434+
9435+ auto &Builder = State.Builder ;
9436+ State.setDebugLocFrom (getDebugLoc ());
9437+
9438+ for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9439+ Instruction *NewSI = nullptr ;
9440+ Value *Mask = nullptr ;
9441+ if (auto *VPMask = getMask ()) {
9442+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
9443+ // of a null all-one mask is a null mask.
9444+ Mask = State.get (VPMask, Part);
9445+ if (isReverse ())
9446+ Mask = Builder.CreateVectorReverse (Mask, " reverse" );
9447+ }
9448+
9449+ Value *StoredVal = State.get (StoredVPValue, Part);
9450+ if (isReverse ()) {
9451+ assert (!State.EVL && " reversing not yet implemented with EVL" );
9452+ // If we store to reverse consecutive memory locations, then we need
9453+ // to reverse the order of elements in the stored value.
9454+ StoredVal = Builder.CreateVectorReverse (StoredVal, " reverse" );
9455+ // We don't want to update the value in the map as it might be used in
9456+ // another expression. So don't call resetVectorValue(StoredVal).
9457+ }
9458+ // TODO: split this into several classes for better design.
9459+ if (State.EVL ) {
9460+ assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
9461+ " explicit vector length." );
9462+ assert (cast<VPInstruction>(State.EVL )->getOpcode () ==
9463+ VPInstruction::ExplicitVectorLength &&
9464+ " EVL must be VPInstruction::ExplicitVectorLength." );
9465+ Value *EVL = State.get (State.EVL , VPIteration (0 , 0 ));
9466+ // If EVL is not nullptr, then EVL must be a valid value set during plan
9467+ // creation, possibly default value = whole vector register length. EVL
9468+ // is created only if TTI prefers predicated vectorization, thus if EVL
9469+ // is not nullptr it also implies preference for predicated
9470+ // vectorization.
9471+ // FIXME: Support reverse store after vp_reverse is added.
9472+ NewSI = lowerStoreUsingVectorIntrinsics (
9473+ Builder, State.get (getAddr (), Part, !CreateScatter), StoredVal,
9474+ CreateScatter, Mask, EVL, Alignment);
9475+ } else if (CreateScatter) {
9476+ Value *VectorGep = State.get (getAddr (), Part);
9477+ NewSI =
9478+ Builder.CreateMaskedScatter (StoredVal, VectorGep, Alignment, Mask);
9479+ } else {
9480+ auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9481+ if (Mask)
9482+ NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr, Alignment, Mask);
9483+ else
9484+ NewSI = Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment);
9485+ }
9486+ State.addMetadata (NewSI, SI);
94989487 }
94999488}
95009489
0 commit comments