diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 167aff737d3fd..c36feb0e2fdef 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -230,7 +230,7 @@ Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) { if (hasScalarValue(Def, Lane)) return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)]; - if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) && + if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) && hasScalarValue(Def, VPLane::getFirstLane())) { return Data.VPV2Scalars[Def][0]; } @@ -303,17 +303,17 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { return ScalarValue; } - bool IsUniform = vputils::isUniformAfterVectorization(Def); + bool IsSingleScalar = vputils::isSingleScalar(Def); - VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1); + VPLane LastLane(IsSingleScalar ? 0 : VF.getKnownMinValue() - 1); // Check if there is a scalar value for the selected lane. if (!hasScalarValue(Def, LastLane)) { // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and - // VPExpandSCEVRecipes can also be uniform. + // VPExpandSCEVRecipes can also be a single scalar. assert((isa(Def->getDefiningRecipe())) && "unexpected recipe found to be invariant"); - IsUniform = true; + IsSingleScalar = true; LastLane = 0; } @@ -334,7 +334,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { // resulting vectors are stored in State, we will only generate the // insertelements once. Value *VectorValue = nullptr; - if (IsUniform) { + if (IsSingleScalar) { VectorValue = GetBroadcastInstrs(ScalarValue); set(Def, VectorValue); } else { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5fd7a369bf735..24903d09c5746 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2553,20 +2553,21 @@ class VPReductionEVLRecipe : public VPReductionRecipe { /// VPReplicateRecipe replicates a given instruction producing multiple scalar /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be -/// uniform only one copy, per lane zero, will be generated. +/// a single scalar, only one copy, per lane zero, will be generated. class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { /// Indicator if only a single replica per lane is needed. - bool IsUniform; + bool IsSingleScalar; /// Indicator if the replicas are also predicated. bool IsPredicated; public: VPReplicateRecipe(Instruction *I, ArrayRef Operands, - bool IsUniform, VPValue *Mask = nullptr, + bool IsSingleScalar, VPValue *Mask = nullptr, VPIRMetadata Metadata = {}) : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), - VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) { + VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), + IsPredicated(Mask) { if (Mask) addOperand(Mask); } @@ -2575,7 +2576,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { VPReplicateRecipe *clone() override { auto *Copy = - new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform, + new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar, isPredicated() ? getMask() : nullptr, *this); Copy->transferFlags(*this); return Copy; @@ -2598,7 +2599,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { VPSlotTracker &SlotTracker) const override; #endif - bool isUniform() const { return IsUniform; } + bool isSingleScalar() const { return IsSingleScalar; } bool isPredicated() const { return IsPredicated; } @@ -2606,7 +2607,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); - return isUniform(); + return isSingleScalar(); } /// Returns true if the recipe uses scalars of operand \p Op. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3c7ab7d24bf6d..6b80ecc3a6075 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1190,7 +1190,7 @@ void VPIRPhi::execute(VPTransformState &State) { PHINode *Phi = &getIRPhi(); for (const auto &[Idx, Op] : enumerate(operands())) { VPValue *ExitValue = Op; - auto Lane = vputils::isUniformAfterVectorization(ExitValue) + auto Lane = vputils::isSingleScalar(ExitValue) ? VPLane::getFirstLane() : VPLane::getLastLaneForVF(State.VF); VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; @@ -2624,7 +2624,7 @@ static void scalarizeInstruction(const Instruction *Instr, for (const auto &I : enumerate(RepRecipe->operands())) { auto InputLane = Lane; VPValue *Operand = I.value(); - if (vputils::isUniformAfterVectorization(Operand)) + if (vputils::isSingleScalar(Operand)) InputLane = VPLane::getFirstLane(); Cloned->setOperand(I.index(), State.get(Operand, InputLane)); } @@ -2650,7 +2650,7 @@ static void scalarizeInstruction(const Instruction *Instr, void VPReplicateRecipe::execute(VPTransformState &State) { Instruction *UI = getUnderlyingInstr(); if (State.Lane) { // Generate a single instance. - assert((State.VF.isScalar() || !isUniform()) && + assert((State.VF.isScalar() || !isSingleScalar()) && "uniform recipe shouldn't be predicated"); assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); scalarizeInstruction(UI, this, *State.Lane, State); @@ -2668,7 +2668,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { return; } - if (IsUniform) { + if (IsSingleScalar) { // Uniform within VL means we need to generate lane 0. scalarizeInstruction(UI, this, VPLane(0), State); return; @@ -2676,8 +2676,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { // A store of a loop varying value to a uniform address only needs the last // copy of the store. - if (isa(UI) && - vputils::isUniformAfterVectorization(getOperand(1))) { + if (isa(UI) && vputils::isSingleScalar(getOperand(1))) { auto Lane = VPLane::getLastLaneForVF(State.VF); scalarizeInstruction(UI, this, VPLane(Lane), State); return; @@ -2738,7 +2737,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, UI->getOpcode(), ResultTy, CostKind, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, Op2Info, Operands, UI, &Ctx.TLI) * - (isUniform() ? 1 : VF.getKnownMinValue()); + (isSingleScalar() ? 1 : VF.getKnownMinValue()); } } @@ -2748,7 +2747,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); + O << Indent << (IsSingleScalar ? "CLONE " : "REPLICATE "); if (!getUnderlyingInstr()->getType()->isVoidTy()) { printAsOperand(O, SlotTracker); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 806c20ef8cf73..20df9bf37d4f3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -151,7 +151,7 @@ static bool sinkScalarOperands(VPlan &Plan) { SinkCandidate->mayReadOrWriteMemory()) continue; if (auto *RepR = dyn_cast(SinkCandidate)) { - if (!ScalarVFOnly && RepR->isUniform()) + if (!ScalarVFOnly && RepR->isSingleScalar()) continue; } else if (!isa(SinkCandidate)) continue; @@ -347,7 +347,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, auto *RecipeWithoutMask = new VPReplicateRecipe( PredRecipe->getUnderlyingInstr(), make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())), - PredRecipe->isUniform(), nullptr /*Mask*/, *PredRecipe); + PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe); auto *Pred = Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); @@ -643,12 +643,11 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { // Skip recipes that shouldn't be narrowed. if (!Def || !isa(Def) || Def->getNumUsers() == 0 || !Def->getUnderlyingValue() || - (RepR && (RepR->isUniform() || RepR->isPredicated()))) + (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))) continue; // Skip recipes that may have other lanes than their first used. - if (!vputils::isUniformAfterVectorization(Def) && - !vputils::onlyFirstLaneUsed(Def)) + if (!vputils::isSingleScalar(Def) && !vputils::onlyFirstLaneUsed(Def)) continue; auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 82b2ed242b0cb..6438c5437b7e3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -109,7 +109,7 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { // VPReplicateRecipe.IsUniform. They are also uniform across UF parts if // all their operands are invariant. // TODO: Further relax the restrictions. - return R->isUniform() && + return R->isSingleScalar() && (isa(R->getUnderlyingValue())) && all_of(R->operands(), isUniformAcrossVFsAndUFs); }) diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 67329a6d6953c..28c1a6af2570b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -37,8 +37,9 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, /// SCEV expression could be constructed. const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE); -/// Returns true if \p VPV is uniform after vectorization. -inline bool isUniformAfterVectorization(const VPValue *VPV) { +/// Returns true if \p VPV is a single scalar, either because it produces the +/// same value for all lanes or only has its first lane used. +inline bool isSingleScalar(const VPValue *VPV) { auto PreservesUniformity = [](unsigned Opcode) -> bool { if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) return true; @@ -65,21 +66,19 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) { // lanes. if (RegionOfR && RegionOfR->isReplicator()) return false; - return Rep->isUniform() || - (PreservesUniformity(Rep->getOpcode()) && - all_of(Rep->operands(), isUniformAfterVectorization)); + return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) && + all_of(Rep->operands(), isSingleScalar)); } if (isa(VPV)) - return all_of(VPV->getDefiningRecipe()->operands(), - isUniformAfterVectorization); + return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); if (auto *WidenR = dyn_cast(VPV)) { return PreservesUniformity(WidenR->getOpcode()) && - all_of(WidenR->operands(), isUniformAfterVectorization); + all_of(WidenR->operands(), isSingleScalar); } if (auto *VPI = dyn_cast(VPV)) return VPI->isSingleScalar() || VPI->isVectorToScalar() || (PreservesUniformity(VPI->getOpcode()) && - all_of(VPI->operands(), isUniformAfterVectorization)); + all_of(VPI->operands(), isSingleScalar)); // VPExpandSCEVRecipes must be placed in the entry and are alway uniform. return isa(VPV);