From cff1255304fbecba052bd46e725f51623bd983f3 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Fri, 17 Jan 2025 02:29:28 -0800 Subject: [PATCH 1/9] Revert "[LV][EVL] Address post-comments for disabling fixed-order recurrence with EVL tail folding. (NFC)" This reverts commit 44a005ff4aa5bcfd06749b5f9ec6bbd582a1ebb0. --- .../vectorize-force-tail-with-evl-fixed-order-recurrence.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index 1dfda837f95a5..e9c60898a4694 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -12,8 +12,8 @@ ; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=NO-VP ; FIXME: Fixed-order recurrence is not supported yet with EVL tail folding. -; The llvm.splice may occur unexpected behavior if the evl of the second-to-last -; iteration is not VF*UF. +; The llvm.splice may occurs unexpected behavior if the evl of the +; second-to-last iteration is not VF*UF. define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-LABEL: define void @first_order_recurrence( From 8fd5ea0ee68f6d45148ab004707a8bde534cb9f2 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Fri, 17 Jan 2025 02:29:48 -0800 Subject: [PATCH 2/9] Revert "[LV][EVL] Disable fixed-order recurrence idiom with EVL tail folding. (#122458)" This reverts commit 9720be95d63ce797437015d0f0edd10b02e80b7a. --- .../Transforms/Vectorize/LoopVectorize.cpp | 5 +- ...ce-tail-with-evl-fixed-order-recurrence.ll | 90 ++++++++----------- 2 files changed, 38 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f3bdc95713afa..55590084492cf 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1454,12 +1454,9 @@ class LoopVectorizationCostModel { // Override forced styles if needed. // FIXME: use actual opcode/data type for analysis here. // FIXME: Investigate opportunity for fixed vector factor. - // FIXME: support fixed-order recurrences by fixing splice of non VFxUF - // penultimate EVL. bool EVLIsLegal = UserIC <= 1 && IsScalableVF && TTI.hasActiveVectorLength(0, nullptr, Align()) && - !EnableVPlanNativePath && - Legal->getFixedOrderRecurrences().empty(); + !EnableVPlanNativePath; if (!EVLIsLegal) { // If for some reason EVL mode is unsupported, fallback to // DataWithoutLaneMask to try to vectorize the loop with folded tail diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index e9c60898a4694..cec849419f069 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -11,10 +11,6 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ ; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=NO-VP -; FIXME: Fixed-order recurrence is not supported yet with EVL tail folding. -; The llvm.splice may occurs unexpected behavior if the evl of the -; second-to-last iteration is not VF*UF. - define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-LABEL: define void @first_order_recurrence( ; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[TC:%.*]]) #[[ATTR0:[0-9]+]] { @@ -31,35 +27,31 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TC]], 1 ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP11]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: -; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP25:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP26:%.*]] = add zeroinitializer, [[TMP25]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP26]] -; IF-EVL-NEXT: [[TMP27:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP15]], i32 4, [[TMP27]], poison) +; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1) -; IF-EVL-NEXT: [[TMP19:%.*]] = add nsw [[TMP16]], [[VP_OP_LOAD]] +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP16]], [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP19]], ptr [[TMP18]], i32 4, [[TMP27]]) -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]] +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -180,7 +172,6 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TC]], 1 ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() @@ -191,30 +182,27 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 4 ; IF-EVL-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP14]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: -; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP16:%.*]] = add i64 [[EVL_BASED_IV]], 0 -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP32:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP33:%.*]] = add zeroinitializer, [[TMP32]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP33]] -; IF-EVL-NEXT: [[TMP34:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT4]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP16]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 4, [[TMP34]], poison) +; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP15]]) ; IF-EVL-NEXT: [[TMP19]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1) ; IF-EVL-NEXT: [[TMP20:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP19]], i32 -1) -; IF-EVL-NEXT: [[TMP23:%.*]] = add nsw [[TMP19]], [[TMP20]] +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP19]], [[TMP20]], splat (i1 true), i32 [[TMP15]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP16]] ; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP21]], i32 0 -; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP23]], ptr [[TMP22]], i32 4, [[TMP34]]) -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]] +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP15]]) +; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP15]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -230,12 +218,12 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL: [[SCALAR_PH]]: ; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT6:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] ; IF-EVL: [[FOR_BODY]]: ; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT6]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] ; IF-EVL-NEXT: [[TMP31]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR1]], [[FOR2]] @@ -354,7 +342,6 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TC]], 1 ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() @@ -369,33 +356,30 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 4 ; IF-EVL-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement poison, i32 11, i32 [[TMP17]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: -; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP19:%.*]] = add i64 [[EVL_BASED_IV]], 0 -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP39:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP40:%.*]] = add zeroinitializer, [[TMP39]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP40]] -; IF-EVL-NEXT: [[TMP41:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT6]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP19]] ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP21]], i32 4, [[TMP41]], poison) +; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP18]]) ; IF-EVL-NEXT: [[TMP22]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1) ; IF-EVL-NEXT: [[TMP23]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP22]], i32 -1) ; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR4]], [[TMP23]], i32 -1) -; IF-EVL-NEXT: [[TMP27:%.*]] = add nsw [[TMP23]], [[TMP24]] -; IF-EVL-NEXT: [[TMP42:%.*]] = add [[TMP27]], [[TMP22]] +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP23]], [[TMP24]], splat (i1 true), i32 [[TMP18]]) +; IF-EVL-NEXT: [[VP_OP5:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP]], [[TMP22]], splat (i1 true), i32 [[TMP18]]) ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP19]] ; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP25]], i32 0 -; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP42]], ptr [[TMP26]], i32 4, [[TMP41]]) -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]] +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP5]], ptr align 4 [[TMP26]], splat (i1 true), i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP18]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP27]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -415,14 +399,14 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL: [[SCALAR_PH]]: ; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT7]], %[[MIDDLE_BLOCK]] ], [ 11, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT7]], %[[MIDDLE_BLOCK]] ], [ 11, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] ; IF-EVL: [[FOR_BODY]]: ; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP38:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT9]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT10]], %[[SCALAR_PH]] ], [ [[FOR2]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT8]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT9]], %[[SCALAR_PH]] ], [ [[FOR2]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] ; IF-EVL-NEXT: [[TMP38]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR2]], [[FOR3]] From 445db491218545df7c00bef5e4655c1be0a7870c Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Mon, 20 Jan 2025 05:55:47 -0800 Subject: [PATCH 3/9] Step 1: Create scalar phi to save previous EVL --- llvm/lib/Transforms/Vectorize/VPlan.h | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 23 +++++++++++++++++++ .../Transforms/Vectorize/VPlanVerifier.cpp | 3 ++- ...ce-tail-with-evl-fixed-order-recurrence.ll | 12 +++++++--- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0f1fa517be000..a19930d4b147f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -534,6 +534,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenPointerInductionSC: case VPRecipeBase::VPReductionPHISC: case VPRecipeBase::VPScalarCastSC: + case VPRecipeBase::VPScalarPHISC: case VPRecipeBase::VPPartialReductionSC: return true; case VPRecipeBase::VPBranchOnMaskSC: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b09933cd0e186..524de3e34a01d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1713,6 +1713,29 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VPTypeAnalysis TypeInfo(CanonicalIVType); LLVMContext &Ctx = CanonicalIVType->getContext(); VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx)); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); + + // Create a scalar phi to track the previous EVL if fixed-order recurrence is + // contained. + bool ContainsFORs = + any_of(Header->phis(), IsaPred); + if (ContainsFORs) { + // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL. + VPValue *MaxEVL = &Plan.getVF(); + // Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer. + if (unsigned VFSize = + TypeInfo.inferScalarType(MaxEVL)->getScalarSizeInBits(); + VFSize != 32) { + MaxEVL = new VPScalarCastRecipe( + VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL, + Type::getInt32Ty(Ctx), DebugLoc()); + VPBasicBlock *Preheader = LoopRegion->getPreheaderVPBB(); + Preheader->appendRecipe(cast(MaxEVL)); + } + auto *PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl"); + PrevEVL->insertBefore(*Header, Header->getFirstNonPhi()); + } for (VPUser *U : to_vector(Plan.getVF().users())) { if (auto *R = dyn_cast(U)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index a4b309d6dcd9f..1b3b69ea6a13d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -143,7 +143,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { }) .Case( [&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); }) - .Case( + .Case( [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) .Case( [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index cec849419f069..19277986874c1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -29,6 +29,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; IF-EVL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP8]] to i32 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 @@ -38,8 +39,9 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP25]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP12]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 0 @@ -174,6 +176,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; IF-EVL-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP8]] to i32 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 @@ -188,8 +191,9 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP32]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP15]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP16:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP16]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0 @@ -344,6 +348,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; IF-EVL-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP8]] to i32 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 @@ -363,8 +368,9 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP39]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP19:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP19]] ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 0 From facd896d76f7f5e7717e286404fccd019bd68ab2 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Thu, 23 Jan 2025 00:21:19 -0800 Subject: [PATCH 4/9] Step 2: Transform llvm.splice to vp.splice This version depend on branch explicit-FOR-op. --- llvm/lib/Analysis/VectorUtils.cpp | 2 ++ .../Transforms/Vectorize/VPlanTransforms.cpp | 22 ++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 53150684b4e4a..91ba68fe03324 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -163,6 +163,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, case Intrinsic::umul_fix: case Intrinsic::umul_fix_sat: return (ScalarOpdIdx == 2); + case Intrinsic::experimental_vp_splice: + return ScalarOpdIdx == 2 || ScalarOpdIdx == 4 || ScalarOpdIdx == 5; default: return false; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 524de3e34a01d..09a00e2488066 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1658,11 +1658,13 @@ void VPlanTransforms::addActiveLaneMask( /// \p TypeInfo VPlan-based type analysis. /// \p AllOneMask The vector mask parameter of vector-predication intrinsics. /// \p EVL The explicit vector length parameter of vector-predication -/// intrinsics. +/// intrinsics. +/// \p PrevEVL The explicit vector length of the previous iteration. static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, VPRecipeBase &CurRecipe, VPTypeAnalysis &TypeInfo, - VPValue &AllOneMask, VPValue &EVL) { + VPValue &AllOneMask, VPValue &EVL, + VPValue *PrevEVL) { using namespace llvm::VPlanPatternMatch; auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { assert(OrigMask && "Unmasked recipe when folding tail"); @@ -1690,6 +1692,15 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, Sel->getDebugLoc()); }) .Case([&](VPInstruction *VPI) -> VPRecipeBase * { + if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) { + assert(PrevEVL && "Fixed-order recurrences require previous EVL"); + SmallVector Ops(VPI->operands()); + Ops.append({&AllOneMask, PrevEVL, &EVL}); + return new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splice, + Ops, TypeInfo.inferScalarType(VPI), + VPI->getDebugLoc()); + } + VPValue *LHS, *RHS; // Transform select with a header mask condition // select(header_mask, LHS, RHS) @@ -1718,6 +1729,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { // Create a scalar phi to track the previous EVL if fixed-order recurrence is // contained. + VPScalarPHIRecipe *PrevEVL = nullptr; bool ContainsFORs = any_of(Header->phis(), IsaPred); if (ContainsFORs) { @@ -1733,7 +1745,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VPBasicBlock *Preheader = LoopRegion->getPreheaderVPBB(); Preheader->appendRecipe(cast(MaxEVL)); } - auto *PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl"); + PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl"); PrevEVL->insertBefore(*Header, Header->getFirstNonPhi()); } @@ -1747,8 +1759,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) { for (VPUser *U : collectUsersRecursively(HeaderMask)) { auto *CurRecipe = cast(U); - VPRecipeBase *EVLRecipe = - createEVLRecipe(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL); + VPRecipeBase *EVLRecipe = createEVLRecipe( + HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL, PrevEVL); if (!EVLRecipe) continue; From ebbdb92499307f4602a5341c381a5193e1444d95 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Thu, 23 Jan 2025 01:14:21 -0800 Subject: [PATCH 5/9] Step 2.1: Directly create -1 in createEVLRecipe. --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 09a00e2488066..54e4a8f22277c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1694,8 +1694,11 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, .Case([&](VPInstruction *VPI) -> VPRecipeBase * { if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) { assert(PrevEVL && "Fixed-order recurrences require previous EVL"); + VPValue *MinusOneVPV = VPI->getParent()->getPlan()->getOrAddLiveIn( + ConstantInt::getSigned(Type::getInt32Ty(TypeInfo.getContext()), + -1)); SmallVector Ops(VPI->operands()); - Ops.append({&AllOneMask, PrevEVL, &EVL}); + Ops.append({MinusOneVPV, &AllOneMask, PrevEVL, &EVL}); return new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splice, Ops, TypeInfo.inferScalarType(VPI), VPI->getDebugLoc()); From 24871c2b176a6fe77ccf1590608945ec74999e01 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Thu, 23 Jan 2025 01:17:50 -0800 Subject: [PATCH 6/9] test update --- ...ize-force-tail-with-evl-fixed-order-recurrence.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index 19277986874c1..baaadac9169b1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -46,7 +46,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1) +; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP12]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP16]], [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0 @@ -198,8 +198,8 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP16]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP15]]) -; IF-EVL-NEXT: [[TMP19]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1) -; IF-EVL-NEXT: [[TMP20:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP19]], i32 -1) +; IF-EVL-NEXT: [[TMP19]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP15]]) +; IF-EVL-NEXT: [[TMP20:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP19]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP15]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP19]], [[TMP20]], splat (i1 true), i32 [[TMP15]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP16]] ; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP21]], i32 0 @@ -375,9 +375,9 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP19]] ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP18]]) -; IF-EVL-NEXT: [[TMP22]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1) -; IF-EVL-NEXT: [[TMP23]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP22]], i32 -1) -; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR4]], [[TMP23]], i32 -1) +; IF-EVL-NEXT: [[TMP22]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP23]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP22]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR4]], [[TMP23]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP18]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP23]], [[TMP24]], splat (i1 true), i32 [[TMP18]]) ; IF-EVL-NEXT: [[VP_OP5:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP]], [[TMP22]], splat (i1 true), i32 [[TMP18]]) ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP19]] From c0c3e323e246a83dcb0037b3fb183fce6c0a0ea3 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Mon, 10 Feb 2025 06:10:14 -0800 Subject: [PATCH 7/9] Add test case to show VPlan --- ...an-vp-intrinsics-fixed-order-recurrence.ll | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll new file mode 100644 index 0000000000000..45047a91a78ac --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -0,0 +1,75 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s + +define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { +; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1' +; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI +; +; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { +; IF-EVL-NEXT: Live-in vp<[[VF:%[0-9]+]]> = VF +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; IF-EVL-NEXT: Live-in ir<%TC> = original trip-count +; IF-EVL-EMPTY: +; IF-EVL: ir-bb: +; IF-EVL-NEXT: Successor(s): vector.ph +; IF-EVL-EMPTY: +; IF-EVL: vector.ph: +; IF-EVL-NEXT: SCALAR-CAST vp<[[VF32:%[0-9]+]]> = trunc vp<[[VF]]> to i32 +; IF-EVL-NEXT: Successor(s): vector loop +; IF-EVL-EMPTY: +; IF-EVL: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> +; IF-EVL-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI:%.+]]> = phi ir<33>, ir<[[LD:%.+]]> +; IF-EVL-NEXT: SCALAR-PHI vp<[[PREV_EVL:%.+]]> = phi vp<[[VF32]]>, vp<[[EVL:%.+]]> +; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%TC>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[EVL]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%A>, vp<[[ST]] +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SPLICE:%[0-9]+]]> = call llvm.experimental.vp.splice(ir<[[FOR_PHI]]>, ir<[[LD]]>, ir<-1>, ir, vp<[[PREV_EVL]]>, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw vp<[[SPLICE]]>, ir<[[LD]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%B>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]> +; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } +; IF-EVL-NEXT: Successor(s): middle.block +; IF-EVL-EMPTY: +; IF-EVL: middle.block: +; IF-EVL-NEXT: EMIT vp<[[RESUME_EXTRACT:%.+]]> = extract-from-end ir<[[LD]]>, ir<1> +; IF-EVL-NEXT: EMIT branch-on-cond ir +; IF-EVL-NEXT: Successor(s): ir-bb, scalar.ph + +entry: + br label %for.body + +for.body: + %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %for.body ] + %for1 = phi i32 [ 33, %entry ], [ %0, %for.body ] + %arrayidx = getelementptr inbounds nuw i32, ptr %A, i64 %indvars + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %for1, %0 + %arrayidx2 = getelementptr inbounds nuw i32, ptr %B, i64 %indvars + store i32 %add, ptr %arrayidx2, align 4 + %indvars.next = add nuw nsw i64 %indvars, 1 + %exitcond.not = icmp eq i64 %indvars.next, %TC + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} From 303d0f42b4413882810a98bc0644a389f3c7284f Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Mon, 17 Feb 2025 06:28:57 -0800 Subject: [PATCH 8/9] Refine comments --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 54e4a8f22277c..3d1de68c83747 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1658,8 +1658,9 @@ void VPlanTransforms::addActiveLaneMask( /// \p TypeInfo VPlan-based type analysis. /// \p AllOneMask The vector mask parameter of vector-predication intrinsics. /// \p EVL The explicit vector length parameter of vector-predication -/// intrinsics. -/// \p PrevEVL The explicit vector length of the previous iteration. +/// intrinsics. +/// \p PrevEVL The explicit vector length of the previous iteration. Only +/// required if \p CurRecipe is a VPInstruction::FirstOrderRecurrenceSplice. static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, VPRecipeBase &CurRecipe, VPTypeAnalysis &TypeInfo, From 26166ae6328658f25dbd9e08f4d7d54819890d39 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Thu, 27 Feb 2025 05:25:54 -0800 Subject: [PATCH 9/9] Rebase and update test case --- ...force-tail-with-evl-fixed-order-recurrence.ll | 16 ++++++++-------- ...vplan-vp-intrinsics-fixed-order-recurrence.ll | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index baaadac9169b1..7886867a0bcd8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -47,7 +47,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP16]], [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = add nsw [[TMP16]], [[VP_OP_LOAD]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -200,7 +200,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP15]]) ; IF-EVL-NEXT: [[TMP19]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP15]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP19]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP15]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP19]], [[TMP20]], splat (i1 true), i32 [[TMP15]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = add nsw [[TMP19]], [[TMP20]] ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP16]] ; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP21]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP15]]) @@ -378,8 +378,8 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP22]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP18]]) ; IF-EVL-NEXT: [[TMP23]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP22]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP18]]) ; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR4]], [[TMP23]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP18]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP23]], [[TMP24]], splat (i1 true), i32 [[TMP18]]) -; IF-EVL-NEXT: [[VP_OP5:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP]], [[TMP22]], splat (i1 true), i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP40:%.*]] = add nsw [[TMP23]], [[TMP24]] +; IF-EVL-NEXT: [[VP_OP5:%.*]] = add [[TMP40]], [[TMP22]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP19]] ; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP25]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP5]], ptr align 4 [[TMP26]], splat (i1 true), i32 [[TMP18]]) @@ -405,14 +405,14 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL: [[SCALAR_PH]]: ; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT7]], %[[MIDDLE_BLOCK]] ], [ 11, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT7]], %[[MIDDLE_BLOCK]] ], [ 11, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] ; IF-EVL: [[FOR_BODY]]: ; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP38:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT8]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT9]], %[[SCALAR_PH]] ], [ [[FOR2]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT7]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT8]], %[[SCALAR_PH]] ], [ [[FOR2]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] ; IF-EVL-NEXT: [[TMP38]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR2]], [[FOR3]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll index 45047a91a78ac..0bcfe13832ae7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -35,7 +35,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SPLICE:%[0-9]+]]> = call llvm.experimental.vp.splice(ir<[[FOR_PHI]]>, ir<[[LD]]>, ir<-1>, ir, vp<[[PREV_EVL]]>, vp<[[EVL]]>) -; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw vp<[[SPLICE]]>, ir<[[LD]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw vp<[[SPLICE]]>, ir<[[LD]]> ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%B>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]>