Skip to content

[LV][VPlan] Add fast flags for selectRecipe #121023

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1812,11 +1812,10 @@ class VPHistogramRecipe : public VPRecipeBase {
};

/// A recipe for widening select instructions.
struct VPWidenSelectRecipe : public VPSingleDefRecipe {
struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
: VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
I.getDebugLoc()) {}
: VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I) {}

~VPWidenSelectRecipe() override = default;

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,7 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent << "WIDEN-SELECT ";
printAsOperand(O, SlotTracker);
O << " = select ";
printFlags(O);
getOperand(0)->printAsOperand(O, SlotTracker);
O << ", ";
getOperand(1)->printAsOperand(O, SlotTracker);
Expand All @@ -1200,6 +1201,8 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
Value *Op1 = State.get(getOperand(2));
Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
State.set(this, Sel);
if (isa<FPMathOperator>(Sel))
setFlags(cast<Instruction>(Sel));
State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 {
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP7:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD2]], [[VEC_PHI1]]
; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP6]], <4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]]
; CHECK-NEXT: [[TMP8]] = select nnan ninf <4 x i1> [[TMP6]], <4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = select nnan ninf <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ define float @fp_reduction_max(ptr noalias %a, i64 %N) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
Expand Down Expand Up @@ -142,7 +142,7 @@ define float @fp_reduction_max(ptr noalias %a, i64 %N) {
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP9]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI5]], [[WIDE_LOAD6]]
; CHECK-NEXT: [[TMP11]] = select <4 x i1> [[TMP10]], <4 x float> [[VEC_PHI5]], <4 x float> [[WIDE_LOAD6]]
; CHECK-NEXT: [[TMP11]] = select fast <4 x i1> [[TMP10]], <4 x float> [[VEC_PHI5]], <4 x float> [[WIDE_LOAD6]]
; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
Expand Down
82 changes: 82 additions & 0 deletions llvm/test/Transforms/LoopVectorize/select-with-fastflags.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5

; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s

define void @select_with_fastmath_flags(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; CHECK-LABEL: define void @select_with_fastmath_flags(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+01)
; CHECK-NEXT: [[TMP7:%.*]] = select fast <4 x i1> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 0
; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP9]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[GEP]], align 4
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[IV]]
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[GEP3]], align 4
; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP11]], 1.000000e+01
; CHECK-NEXT: [[COND:%.*]] = select fast i1 [[CMP4]], float [[ADD]], float [[TMP12]]
; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: store float [[COND]], ptr [[GEP11]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
%gep = getelementptr inbounds nuw float, ptr %b, i64 %iv
%0 = load float, ptr %gep, align 4
%gep3 = getelementptr inbounds nuw float, ptr %c, i64 %iv
%1 = load float, ptr %gep3, align 4
%cmp4 = fcmp fast ogt float %0, %1
%add = fadd fast float %0, 1.000000e+01
%cond = select fast i1 %cmp4, float %add, float %1
%gep11 = getelementptr inbounds nuw float, ptr %a, i64 %iv
store float %cond, ptr %gep11, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %N
br i1 %exitcond.not, label %exit, label %for.body

exit:
ret void
}

; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.
56 changes: 56 additions & 0 deletions llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,62 @@ exit:
ret i16 %for.1
}

define void @print_select_with_fastmath_flags(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; CHECK-LABEL: 'print_select_with_fastmath_flags'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1'
; CHECK-NEXT: Live-in vp<[[VFUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<%N> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT_EXIT:%.+]]>
; CHECK-NEXT: vp<[[ST:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%b>, vp<[[ST]]>
; CHECK-NEXT: vp<[[PTR1:%.+]]> = vector-pointer ir<[[GEP1]]>
; CHECK-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]>
; CHECK-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%c>, vp<[[ST]]>
; CHECK-NEXT: vp<[[PTR2:%.+]]> = vector-pointer ir<[[GEP2]]>
; CHECK-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]>
; CHECK-NEXT: WIDEN ir<[[FCMP:%.+]]> = fcmp ogt ir<[[LD1]]>, ir<[[LD2]]>
; CHECK-NEXT: WIDEN ir<[[FADD:%.+]]> = fadd reassoc nnan ninf nsz arcp contract afn ir<[[LD1]]>, ir<1.000000e+01>
; CHECK-NEXT: WIDEN-SELECT ir<[[SELECT:%.+]]> = select reassoc nnan ninf nsz arcp contract afn ir<[[FCMP]]>, ir<[[FADD]]>, ir<[[LD2]]>
; CHECK-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds nuw ir<%a>, vp<[[ST]]>
; CHECK-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]>
; CHECK-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[SELECT]]>
; CHECK-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }

entry:
br label %for.body

for.body:
%iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
%gep = getelementptr inbounds nuw float, ptr %b, i64 %iv
%0 = load float, ptr %gep, align 4
%gep3 = getelementptr inbounds nuw float, ptr %c, i64 %iv
%1 = load float, ptr %gep3, align 4
%cmp4 = fcmp fast ogt float %0, %1
%add = fadd fast float %0, 1.000000e+01
%cond = select fast i1 %cmp4, float %add, float %1
%gep11 = getelementptr inbounds nuw float, ptr %a, i64 %iv
store float %cond, ptr %gep11, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %N
br i1 %exitcond.not, label %exit, label %for.body

exit:
ret void
}

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}

Expand Down
Loading