diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 02b73c3f1dba9..46e55be3f643b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3111,10 +3111,11 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe, /// using the address to load from, the explicit vector length and an optional /// mask. struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { - VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask) + VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, + VPValue *Mask) : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(), - {L.getAddr(), &EVL}, L.isConsecutive(), - L.isReverse(), L, L.getDebugLoc()), + {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L, + L.getDebugLoc()), VPValue(this, &getIngredient()) { setMask(Mask); } @@ -3192,11 +3193,11 @@ struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe { /// using the value to store, the address to store to, the explicit vector /// length and an optional mask. struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { - VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask) + VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, + VPValue *Mask) : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(), - {S.getAddr(), S.getStoredValue(), &EVL}, - S.isConsecutive(), S.isReverse(), S, - S.getDebugLoc()) { + {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(), + S.isReverse(), S, S.getDebugLoc()) { setMask(Mask); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 45d9ee1753c3a..cff43c2742a6b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2138,6 +2138,8 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, VPRecipeBase &CurRecipe, VPTypeAnalysis &TypeInfo, VPValue &AllOneMask, VPValue &EVL) { + // FIXME: Don't transform recipes to EVL recipes if they're not masked by the + // header mask. auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { assert(OrigMask && "Unmasked recipe when folding tail"); // HeaderMask will be handled using EVL. @@ -2147,14 +2149,35 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, return HeaderMask == OrigMask ? nullptr : OrigMask; }; + /// Adjust any end pointers so that they point to the end of EVL lanes not VF. + auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * { + auto *EndPtr = dyn_cast(Addr); + if (!EndPtr) + return Addr; + assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() && + "VPVectorEndPointerRecipe with non-VF VF operand?"); + assert( + all_of(EndPtr->users(), + [](VPUser *U) { + return cast(U)->isReverse(); + }) && + "VPVectorEndPointRecipe not used by reversed widened memory recipe?"); + VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone(); + EVLAddr->insertBefore(&CurRecipe); + EVLAddr->setOperand(1, &EVL); + return EVLAddr; + }; + return TypeSwitch(&CurRecipe) .Case([&](VPWidenLoadRecipe *L) { VPValue *NewMask = GetNewMask(L->getMask()); - return new VPWidenLoadEVLRecipe(*L, EVL, NewMask); + VPValue *NewAddr = GetNewAddr(L->getAddr()); + return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask); }) .Case([&](VPWidenStoreRecipe *S) { VPValue *NewMask = GetNewMask(S->getMask()); - return new VPWidenStoreEVLRecipe(*S, EVL, NewMask); + VPValue *NewAddr = GetNewAddr(S->getAddr()); + return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask); }) .Case([&](VPReductionRecipe *Red) { VPValue *NewMask = GetNewMask(Red->getCondOp()); @@ -2189,7 +2212,9 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { IsaPred) && "User of VF that we can't transform to EVL."); - Plan.getVF().replaceAllUsesWith(&EVL); + Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) { + return isa(U); + }); assert(all_of(Plan.getVFxUF().users(), [&Plan](VPUser *U) {