From 9a0e4d70829292b1b5604920a849085c80583f6c Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Sun, 15 Dec 2024 23:04:37 -0800 Subject: [PATCH 01/11] [VPlan] Passing non-null instruction when creating VPReductionRecipe in unittest. --- .../Transforms/Vectorize/VPlanTest.cpp | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index f9a85869e3142..da6c490d7af6a 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1165,22 +1165,27 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { } { + auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32), + PoisonValue::get(Int32)); VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp, false); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); EXPECT_FALSE(Recipe.mayReadOrWriteMemory()); + delete Add; } { + auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32), + PoisonValue::get(Int32)); VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp, false); VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4)); VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp); @@ -1188,6 +1193,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { EXPECT_FALSE(EVLRecipe.mayReadFromMemory()); EXPECT_FALSE(EVLRecipe.mayWriteToMemory()); EXPECT_FALSE(EVLRecipe.mayReadOrWriteMemory()); + delete Add; } { @@ -1529,28 +1535,34 @@ TEST_F(VPRecipeTest, dumpRecipeUnnamedVPValuesNotInPlanOrBlock) { TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) { IntegerType *Int32 = IntegerType::get(C, 32); + auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32), + PoisonValue::get(Int32)); VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp, false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); + delete Add; } TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) { IntegerType *Int32 = IntegerType::get(C, 32); + auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32), + PoisonValue::get(Int32)); VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp, false); VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0)); VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp); EXPECT_TRUE(isa(&EVLRecipe)); VPRecipeBase *BaseR = &EVLRecipe; EXPECT_TRUE(isa(BaseR)); + delete Add; } } // namespace From 087a53345b6a76052225c5a71ac532cc24716ae9 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Tue, 11 Mar 2025 19:27:51 -0700 Subject: [PATCH 02/11] [VPlan] Change parent of VPReductionRecipe to VPRecipeWithIRFlags. NFC This patch change the parent of the VPReductionRecipe from VPSingleDefRecipe to VPRecipeWithIRFlags and also print/get/control flags by the VPRecipeWithIRFlags. This will remove the dependency of the underlying instruction. This patch also add a new function `setFastMathFlags()` to the VPRecipeWithIRFlags because the entire reduction chain may contains multiple instructions. And the underlying instruction may not contains the corresponding flags for this reduction. --- .../Transforms/Vectorize/LoopVectorize.cpp | 6 ++--- llvm/lib/Transforms/Vectorize/VPlan.h | 27 +++++++++++++------ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 24 ++++++++++------- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cb860a472d8f7..131f2e245c461 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9799,9 +9799,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (CM.blockNeedsPredicationForAnyReason(BB)) CondOp = RecipeBuilder.getBlockInMask(BB); - auto *RedRecipe = new VPReductionRecipe( - RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp, - CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc()); + auto *RedRecipe = + new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp, + CondOp, CM.useOrderedReductions(RdxDesc)); // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. // Delete CurrentLink as it will be invalid if its operand is replaced diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b1288c42b20f2..3f5a074919c8a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -713,6 +713,8 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC || + R->getVPDefID() == VPRecipeBase::VPReductionSC || + R->getVPDefID() == VPRecipeBase::VPReductionEVLSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC || R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; @@ -788,6 +790,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { } } + /// Set fast-math flags for this recipe. + void setFastMathFlags(FastMathFlags FMFs) { + OpType = OperationType::FPMathOp; + this->FMFs = FMFs; + } + CmpInst::Predicate getPredicate() const { assert(OpType == OperationType::Cmp && "recipe doesn't have a compare predicate"); @@ -2286,7 +2294,7 @@ class VPInterleaveRecipe : public VPRecipeBase { /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. /// The Operands are {ChainOp, VecOp, [Condition]}. -class VPReductionRecipe : public VPSingleDefRecipe { +class VPReductionRecipe : public VPRecipeWithIRFlags { /// The recurrence decriptor for the reduction in question. const RecurrenceDescriptor &RdxDesc; bool IsOrdered; @@ -2296,29 +2304,32 @@ class VPReductionRecipe : public VPSingleDefRecipe { protected: VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef Operands, - VPValue *CondOp, bool IsOrdered, DebugLoc DL) - : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R), + VPValue *CondOp, bool IsOrdered) + : VPRecipeWithIRFlags(SC, Operands, *I), RdxDesc(R), IsOrdered(IsOrdered) { if (CondOp) { IsConditional = true; addOperand(CondOp); } + // The inloop reduction may across multiple scalar instruction and the + // underlying instruction may not contains the corresponding flags. Set the + // flags explicit from the redurrence descriptor. + setFastMathFlags(R.getFastMathFlags()); } public: VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, - bool IsOrdered, DebugLoc DL = {}) + bool IsOrdered) : VPReductionRecipe(VPDef::VPReductionSC, R, I, ArrayRef({ChainOp, VecOp}), CondOp, - IsOrdered, DL) {} + IsOrdered) {} ~VPReductionRecipe() override = default; VPReductionRecipe *clone() override { return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(), - getVecOp(), getCondOp(), IsOrdered, - getDebugLoc()); + getVecOp(), getCondOp(), IsOrdered); } static inline bool classof(const VPRecipeBase *R) { @@ -2373,7 +2384,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe { VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(), cast_or_null(R.getUnderlyingValue()), ArrayRef({R.getChainOp(), R.getVecOp(), &EVL}), CondOp, - R.isOrdered(), R.getDebugLoc()) {} + R.isOrdered()) {} ~VPReductionEVLRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index d154d54c37862..0c93023ac742f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2224,7 +2224,8 @@ void VPReductionRecipe::execute(VPTransformState &State) { RecurKind Kind = RdxDesc.getRecurrenceKind(); // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); - State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + if (hasFastMathFlags()) + State.Builder.setFastMathFlags(getFastMathFlags()); State.setDebugLocFrom(getDebugLoc()); Value *NewVecOp = State.get(getVecOp()); if (VPValue *Cond = getCondOp()) { @@ -2275,7 +2276,8 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) { // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(Builder); const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor(); - Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + if (hasFastMathFlags()) + Builder.setFastMathFlags(getFastMathFlags()); RecurKind Kind = RdxDesc.getRecurrenceKind(); Value *Prev = State.get(getChainOp(), /*IsScalar*/ true); @@ -2312,6 +2314,8 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, Type *ElementTy = Ctx.Types.inferScalarType(this); auto *VectorTy = cast(toVectorTy(ElementTy, VF)); unsigned Opcode = RdxDesc.getOpcode(); + FastMathFlags FMFs = + hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(); // TODO: Support any-of and in-loop reductions. assert( @@ -2331,12 +2335,12 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) { Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind); - return Cost + Ctx.TTI.getMinMaxReductionCost( - Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind); + return Cost + + Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind); } - return Cost + Ctx.TTI.getArithmeticReductionCost( - Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind); + return Cost + Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs, + Ctx.CostKind); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2347,8 +2351,8 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - if (isa(getUnderlyingInstr())) - O << getUnderlyingInstr()->getFastMathFlags(); + if (isa_and_nonnull(getUnderlyingValue())) + printFlags(O); O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); if (isConditional()) { @@ -2369,8 +2373,8 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - if (isa(getUnderlyingInstr())) - O << getUnderlyingInstr()->getFastMathFlags(); + if (isa_and_nonnull(getUnderlyingValue())) + printFlags(O); O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); O << ", "; From 6be4c38c6f0226347b910d2131d562d22819c520 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Tue, 11 Mar 2025 20:00:34 -0700 Subject: [PATCH 03/11] !fixup, formating. --- llvm/unittests/Transforms/Vectorize/VPlanTest.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index da6c490d7af6a..ca1e48290f25b 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1540,8 +1540,8 @@ TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) { VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, - VecOp, false); + VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp, + false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1555,8 +1555,8 @@ TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) { VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, - VecOp, false); + VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp, + false); VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0)); VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp); EXPECT_TRUE(isa(&EVLRecipe)); From d60f933d83f1976094996753e050c811bfd86045 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Fri, 14 Mar 2025 05:54:29 -0700 Subject: [PATCH 04/11] !fixup, independent from #130880. --- llvm/test/Transforms/LoopVectorize/vplan-printing.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 00d8de67a3b40..0dc7b6b0188ca 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -167,7 +167,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>) ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors @@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors From 07b5790253c965c0393ab32c87a0654b9359f754 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Fri, 14 Mar 2025 06:25:35 -0700 Subject: [PATCH 05/11] !fixup, Address comments. --- llvm/lib/Transforms/Vectorize/VPlan.h | 5 +++-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++---- .../LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 3f5a074919c8a..5f8da4759e30d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -792,7 +792,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { /// Set fast-math flags for this recipe. void setFastMathFlags(FastMathFlags FMFs) { - OpType = OperationType::FPMathOp; + assert(OpType == OperationType::FPMathOp); this->FMFs = FMFs; } @@ -2314,7 +2314,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { // The inloop reduction may across multiple scalar instruction and the // underlying instruction may not contains the corresponding flags. Set the // flags explicit from the redurrence descriptor. - setFastMathFlags(R.getFastMathFlags()); + if (isa(I)) + setFastMathFlags(R.getFastMathFlags()); } public: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0c93023ac742f..671964bb0eb08 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2351,8 +2351,7 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - if (isa_and_nonnull(getUnderlyingValue())) - printFlags(O); + printFlags(O); O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); if (isConditional()) { @@ -2373,8 +2372,7 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - if (isa_and_nonnull(getUnderlyingValue())) - printFlags(O); + printFlags(O); O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); O << ", "; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 3594b3f047363..f1ca934cc133a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -101,7 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) +; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + nsw vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> From 48454c6a07fed1e5759cb83c12c70dcd6abd5702 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Fri, 14 Mar 2025 07:11:00 -0700 Subject: [PATCH 06/11] !fixup, Update comments. --- llvm/lib/Transforms/Vectorize/VPlan.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5f8da4759e30d..7285b9413cff2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2311,9 +2311,10 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { IsConditional = true; addOperand(CondOp); } - // The inloop reduction may across multiple scalar instruction and the - // underlying instruction may not contains the corresponding flags. Set the - // flags explicit from the redurrence descriptor. + + // In-loop reductions may comprise of multiple scalar instructions, and the + // underlying instruction may not contain the same flags as the + // recurrence descriptor, so set the flags explicitly. if (isa(I)) setFastMathFlags(R.getFastMathFlags()); } From 36b40fccbbf4d2f21226011059b8681e7aabd712 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Fri, 14 Mar 2025 08:41:57 -0700 Subject: [PATCH 07/11] !fixup, Add description of the assertion. --- llvm/lib/Transforms/Vectorize/VPlan.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 7285b9413cff2..6df03c91820c3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -792,7 +792,8 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { /// Set fast-math flags for this recipe. void setFastMathFlags(FastMathFlags FMFs) { - assert(OpType == OperationType::FPMathOp); + assert(OpType == OperationType::FPMathOp && + "We should only set the FastMathFlags when the recipes is FPFathOP"); this->FMFs = FMFs; } From 0473b058953056c0c22748bfbf5caccd9945f6ce Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Fri, 14 Mar 2025 17:23:57 -0700 Subject: [PATCH 08/11] !fixup, Set FMF for VPReductionRecipe in ctor directly. --- .../Transforms/Vectorize/LoopVectorize.cpp | 6 ++-- llvm/lib/Transforms/Vectorize/VPlan.h | 36 ++++++++----------- .../RISCV/vplan-vp-intrinsics-reduction.ll | 4 +-- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 131f2e245c461..cb860a472d8f7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9799,9 +9799,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (CM.blockNeedsPredicationForAnyReason(BB)) CondOp = RecipeBuilder.getBlockInMask(BB); - auto *RedRecipe = - new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp, - CondOp, CM.useOrderedReductions(RdxDesc)); + auto *RedRecipe = new VPReductionRecipe( + RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp, + CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc()); // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. // Delete CurrentLink as it will be invalid if its operand is replaced diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6df03c91820c3..b75618a09eede 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -790,13 +790,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { } } - /// Set fast-math flags for this recipe. - void setFastMathFlags(FastMathFlags FMFs) { - assert(OpType == OperationType::FPMathOp && - "We should only set the FastMathFlags when the recipes is FPFathOP"); - this->FMFs = FMFs; - } - CmpInst::Predicate getPredicate() const { assert(OpType == OperationType::Cmp && "recipe doesn't have a compare predicate"); @@ -2304,35 +2297,32 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { protected: VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, - Instruction *I, ArrayRef Operands, - VPValue *CondOp, bool IsOrdered) - : VPRecipeWithIRFlags(SC, Operands, *I), RdxDesc(R), + FastMathFlags FMF, Instruction *I, + ArrayRef Operands, VPValue *CondOp, + bool IsOrdered, DebugLoc DL) + : VPRecipeWithIRFlags(SC, Operands, FMF, DL), RdxDesc(R), IsOrdered(IsOrdered) { if (CondOp) { IsConditional = true; addOperand(CondOp); } - - // In-loop reductions may comprise of multiple scalar instructions, and the - // underlying instruction may not contain the same flags as the - // recurrence descriptor, so set the flags explicitly. - if (isa(I)) - setFastMathFlags(R.getFastMathFlags()); + setUnderlyingValue(I); } public: VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, - bool IsOrdered) - : VPReductionRecipe(VPDef::VPReductionSC, R, I, + bool IsOrdered, DebugLoc DL = {}) + : VPReductionRecipe(VPDef::VPReductionSC, R, R.getFastMathFlags(), I, ArrayRef({ChainOp, VecOp}), CondOp, - IsOrdered) {} + IsOrdered, DL) {} ~VPReductionRecipe() override = default; VPReductionRecipe *clone() override { return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(), - getVecOp(), getCondOp(), IsOrdered); + getVecOp(), getCondOp(), IsOrdered, + getDebugLoc()); } static inline bool classof(const VPRecipeBase *R) { @@ -2382,12 +2372,14 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { /// The Operands are {ChainOp, VecOp, EVL, [Condition]}. class VPReductionEVLRecipe : public VPReductionRecipe { public: - VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp) + VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, + DebugLoc DL = {}) : VPReductionRecipe( VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(), + R.getFastMathFlags(), cast_or_null(R.getUnderlyingValue()), ArrayRef({R.getChainOp(), R.getVecOp(), &EVL}), CondOp, - R.isOrdered()) {} + R.isOrdered(), DL) {} ~VPReductionEVLRecipe() override = default; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index f1ca934cc133a..2be823a8ef9c5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -101,7 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + nsw vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) +; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -196,7 +196,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; NO-VP-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> -; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reduce.add (ir<[[LD1]]>) +; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn reduce.add (ir<[[LD1]]>) ; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; NO-VP-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; NO-VP-INLOOP-NEXT: No successors From 942a14ddb1cc5b1acec707f51f707880d10c31a9 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Sun, 16 Mar 2025 17:05:11 -0700 Subject: [PATCH 09/11] !fixup, Remove unneeded checks and check undelryingInstr is FPMO or not. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 15 +++++++-------- .../RISCV/vplan-vp-intrinsics-reduction.ll | 4 ++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 671964bb0eb08..7e037efaf1aa2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2224,8 +2224,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { RecurKind Kind = RdxDesc.getRecurrenceKind(); // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); - if (hasFastMathFlags()) - State.Builder.setFastMathFlags(getFastMathFlags()); + State.Builder.setFastMathFlags(getFastMathFlags()); State.setDebugLocFrom(getDebugLoc()); Value *NewVecOp = State.get(getVecOp()); if (VPValue *Cond = getCondOp()) { @@ -2276,8 +2275,7 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) { // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(Builder); const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor(); - if (hasFastMathFlags()) - Builder.setFastMathFlags(getFastMathFlags()); + Builder.setFastMathFlags(getFastMathFlags()); RecurKind Kind = RdxDesc.getRecurrenceKind(); Value *Prev = State.get(getChainOp(), /*IsScalar*/ true); @@ -2314,8 +2312,7 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, Type *ElementTy = Ctx.Types.inferScalarType(this); auto *VectorTy = cast(toVectorTy(ElementTy, VF)); unsigned Opcode = RdxDesc.getOpcode(); - FastMathFlags FMFs = - hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(); + FastMathFlags FMFs = getFastMathFlags(); // TODO: Support any-of and in-loop reductions. assert( @@ -2351,7 +2348,8 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - printFlags(O); + if (isa(getUnderlyingInstr())) + printFlags(O); O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); if (isConditional()) { @@ -2372,7 +2370,8 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - printFlags(O); + if (isa(getUnderlyingInstr())) + printFlags(O); O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); O << ", "; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 2be823a8ef9c5..3594b3f047363 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -101,7 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) +; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -196,7 +196,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; NO-VP-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> -; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn reduce.add (ir<[[LD1]]>) +; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reduce.add (ir<[[LD1]]>) ; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; NO-VP-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; NO-VP-INLOOP-NEXT: No successors From abd65e99cac04aeed5001686f3188f507367fc97 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Sun, 16 Mar 2025 21:34:18 -0700 Subject: [PATCH 10/11] !fixup, Remove FMF from recipes after add checks of underlying instruction. --- llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b75618a09eede..23193da8410cd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2297,10 +2297,9 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { protected: VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, - FastMathFlags FMF, Instruction *I, - ArrayRef Operands, VPValue *CondOp, - bool IsOrdered, DebugLoc DL) - : VPRecipeWithIRFlags(SC, Operands, FMF, DL), RdxDesc(R), + Instruction *I, ArrayRef Operands, + VPValue *CondOp, bool IsOrdered, DebugLoc DL) + : VPRecipeWithIRFlags(SC, Operands, R.getFastMathFlags(), DL), RdxDesc(R), IsOrdered(IsOrdered) { if (CondOp) { IsConditional = true; @@ -2313,7 +2312,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL = {}) - : VPReductionRecipe(VPDef::VPReductionSC, R, R.getFastMathFlags(), I, + : VPReductionRecipe(VPDef::VPReductionSC, R, I, ArrayRef({ChainOp, VecOp}), CondOp, IsOrdered, DL) {} @@ -2376,7 +2375,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe { DebugLoc DL = {}) : VPReductionRecipe( VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(), - R.getFastMathFlags(), cast_or_null(R.getUnderlyingValue()), ArrayRef({R.getChainOp(), R.getVecOp(), &EVL}), CondOp, R.isOrdered(), DL) {} From 6c6ccb245a1e11a58507c870e920407f6ba9b923 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Mon, 17 Mar 2025 05:14:28 -0700 Subject: [PATCH 11/11] !fixup, Only set FMF for FMOperators and update after merge main. --- llvm/lib/Transforms/Vectorize/VPlan.h | 8 ++++++-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++---- llvm/test/Transforms/LoopVectorize/vplan-printing.ll | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b28c1aad9abb2..ba24143e0b5b6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2249,8 +2249,12 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL) - : VPRecipeWithIRFlags(SC, Operands, R.getFastMathFlags(), DL), RdxDesc(R), - IsOrdered(IsOrdered) { + : VPRecipeWithIRFlags(SC, Operands, + isa_and_nonnull(I) + ? R.getFastMathFlags() + : FastMathFlags(), + DL), + RdxDesc(R), IsOrdered(IsOrdered) { if (CondOp) { IsConditional = true; addOperand(CondOp); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bea8927cc3cfd..930ffe8d7496d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2397,8 +2397,7 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - if (isa(getUnderlyingInstr())) - printFlags(O); + printFlags(O); O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); if (isConditional()) { @@ -2419,8 +2418,7 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; getChainOp()->printAsOperand(O, SlotTracker); O << " +"; - if (isa(getUnderlyingInstr())) - printFlags(O); + printFlags(O); O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); O << ", "; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 4493257e8aef0..207cb8b4a0d30 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -167,7 +167,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>) +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors @@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors