diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f5337b11edc97..0d45c1f509ba5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8170,8 +8170,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, // builder. At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. - // TODO: At the moment the first mask is always skipped, but it would be - // better to skip the most expensive mask. SmallVector OperandsWithMask; for (unsigned In = 0; In < NumIncoming; In++) { @@ -8184,8 +8182,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, "Distinct incoming values with one having a full mask"); break; } - if (In == 0) - continue; OperandsWithMask.push_back(EdgeMask); } return new VPBlendRecipe(Phi, OperandsWithMask); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 016ad75c21ceb..bdc918f7d993a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2033,12 +2033,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe { class VPBlendRecipe : public VPSingleDefRecipe { public: /// The blend operation is a User of the incoming values and of their - /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first - /// incoming value does not have a mask associated. + /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can + /// be ommited (implied by passing an odd number of operands) in which case + /// all other incoming values are merged into it. VPBlendRecipe(PHINode *Phi, ArrayRef Operands) : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { - assert((Operands.size() + 1) % 2 == 0 && - "Expected an odd number of operands"); + assert(Operands.size() > 0 && "Expected at least one operand!"); } VPBlendRecipe *clone() override { @@ -2048,19 +2048,25 @@ class VPBlendRecipe : public VPSingleDefRecipe { VP_CLASSOF_IMPL(VPDef::VPBlendSC) - /// Return the number of incoming values, taking into account that the first - /// incoming value has no mask. - unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } + /// A normalized blend is one that has an odd number of operands, whereby the + /// first operand does not have an associated mask. + bool isNormalized() const { return getNumOperands() % 2; } + + /// Return the number of incoming values, taking into account when normalized + /// the first incoming value will have no mask. + unsigned getNumIncomingValues() const { + return (getNumOperands() + isNormalized()) / 2; + } /// Return incoming value number \p Idx. VPValue *getIncomingValue(unsigned Idx) const { - return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1); + return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized()); } /// Return mask number \p Idx. VPValue *getMask(unsigned Idx) const { - assert(Idx > 0 && "First index has no mask associated."); - return getOperand(Idx * 2); + assert((Idx > 0 || !isNormalized()) && "First index has no mask!"); + return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized()); } /// Generate the phi/select nodes. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 911b2fe9e9a1e..e4d629ef8c27c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1635,6 +1635,7 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPBlendRecipe::execute(VPTransformState &State) { + assert(isNormalized() && "Expected blend to be normalized!"); State.setDebugLocFrom(getDebugLoc()); // We know that all PHIs in non-header blocks are converted into // selects, so we don't have to worry about the insertion order and we diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 045f6c356669f..12bf66c5fdd67 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -989,15 +989,47 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) { /// Try to simplify recipe \p R. static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { using namespace llvm::VPlanPatternMatch; - // Try to remove redundant blend recipes. + if (auto *Blend = dyn_cast(&R)) { - VPValue *Inc0 = Blend->getIncomingValue(0); + // Try to remove redundant blend recipes. + SmallPtrSet UniqueValues; + if (Blend->isNormalized() || !match(Blend->getMask(0), m_False())) + UniqueValues.insert(Blend->getIncomingValue(0)); for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) - if (Inc0 != Blend->getIncomingValue(I) && - !match(Blend->getMask(I), m_False())) - return; - Blend->replaceAllUsesWith(Inc0); + if (!match(Blend->getMask(I), m_False())) + UniqueValues.insert(Blend->getIncomingValue(I)); + + if (UniqueValues.size() == 1) { + Blend->replaceAllUsesWith(*UniqueValues.begin()); + Blend->eraseFromParent(); + return; + } + + if (Blend->isNormalized()) + return; + + // Normalize the blend so its first incomming value is used as the initial + // value with the others blended into it. + + unsigned StartIndex = 0; + SmallVector OperandsWithMask; + OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex)); + + for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) { + if (I == StartIndex) + continue; + OperandsWithMask.push_back(Blend->getIncomingValue(I)); + OperandsWithMask.push_back(Blend->getMask(I)); + } + + auto *NewBlend = new VPBlendRecipe( + cast(Blend->getUnderlyingValue()), OperandsWithMask); + NewBlend->insertBefore(&R); + + VPValue *DeadMask = Blend->getMask(StartIndex); + Blend->replaceAllUsesWith(NewBlend); Blend->eraseFromParent(); + recursivelyDeleteDeadRecipes(DeadMask); return; }