diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 55e90298b36cd..5e53adcaaeb41 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -918,6 +918,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { // value with the others blended into it. unsigned StartIndex = 0; + for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) { + // If a value's mask is used only by the blend then is can be deadcoded. + // TODO: Find the most expensive mask that can be deadcoded, or a mask + // that's used by multiple blends where it can be removed from them all. + VPValue *Mask = Blend->getMask(I); + if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) { + StartIndex = I; + break; + } + } + SmallVector OperandsWithMask; OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex)); @@ -996,6 +1007,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) && X == X1 && Y == Y1) { R.getVPSingleValue()->replaceAllUsesWith(X); + R.eraseFromParent(); return; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index f467f3cf262d2..93034f4dbe56e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -215,16 +215,14 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 50, i64 0), poison, zeroinitializer) ; TFCOMMON-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer ; TFCOMMON-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP7]]) -; TFCOMMON-NEXT: [[TMP9:%.*]] = xor [[TMP6]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], zeroinitializer, [[TMP8]] -; TFCOMMON-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], zeroinitializer +; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; TFCOMMON-NEXT: br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP11]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFCOMMON: for.cond.cleanup: ; TFCOMMON-NEXT: ret void ; @@ -259,27 +257,23 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP13]]) ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP14]]) -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = xor [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP18]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP19]], zeroinitializer, [[TMP15]] -; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP20]], zeroinitializer, [[TMP16]] -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP23]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP21]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP24]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[TMP15]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP14]], [[TMP16]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFA_INTERLEAVE: for.cond.cleanup: ; TFA_INTERLEAVE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index f922873210b05..66d001498e457 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -1216,7 +1216,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 @@ -1226,41 +1226,39 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP15]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, [[TMP14]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer -; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP18]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD1]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP19]]) +; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], [[WIDE_MASKED_LOAD1]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP17]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP23]], 0.000000e+00 +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP21]], 0.000000e+00 ; CHECK-ORDERED-TF-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-ORDERED-TF: if.then: ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-ORDERED-TF-NEXT: br label [[FOR_INC]] ; CHECK-ORDERED-TF: for.inc: -; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP24]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP22]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 8500f5fc194c6..731dadf9ca204 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -467,16 +467,15 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[TMP14:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP15]], poison) -; CHECK-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], zeroinitializer, [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[WIDE_MASKED_GATHER]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -485,14 +484,14 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK: for.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP21]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP20]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[SRC]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[VAL_0:%.*]] = phi i32 [ [[TMP22]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] +; CHECK-NEXT: [[VAL_0:%.*]] = phi i32 [ [[TMP21]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store i32 [[VAL_0]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index d19ca172a8c0a..8b0c99b353c8b 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -33,18 +33,15 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -54,16 +51,16 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] ; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]] ; CHECK: if.then: -; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], 19 +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], 19 ; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP16]], 4 +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP13]], 4 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP10]], i32 4, i32 5 ; CHECK-NEXT: br label [[IF_END14]] ; CHECK: if.end14: @@ -112,3 +109,122 @@ for.end: ret i32 undef } +; As above but with multiple variables set per block. +define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { +; CHECK-LABEL: @multi_variable_if_nest( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP26]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[N]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]] +; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]] +; CHECK-NEXT: store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]] +; CHECK: if.then: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP16]], 19 +; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP17]], 4 +; CHECK-NEXT: [[X_ELSE:%.*]] = select i1 [[CMP10]], i32 4, i32 5 +; CHECK-NEXT: [[Y_ELSE:%.*]] = select i1 [[CMP10]], i32 6, i32 11 +; CHECK-NEXT: br label [[IF_END14]] +; CHECK: if.end14: +; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 9, [[FOR_BODY]] ], [ 3, [[IF_THEN]] ], [ [[X_ELSE]], [[IF_ELSE]] ] +; CHECK-NEXT: [[Y_0:%.*]] = phi i32 [ 18, [[FOR_BODY]] ], [ 7, [[IF_THEN]] ], [ [[Y_ELSE]], [[IF_ELSE]] ] +; CHECK-NEXT: store i32 [[X_0]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 [[Y_0]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 undef +; +entry: + %cmp26 = icmp sgt i32 %n, 0 + br i1 %cmp26, label %for.body, label %for.end + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %if.end14 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv + %1 = load i32, ptr %arrayidx2, align 4 + %cmp3 = icmp sgt i32 %0, %1 + br i1 %cmp3, label %if.then, label %if.end14 + +if.then: + %cmp6 = icmp sgt i32 %0, 19 + br i1 %cmp6, label %if.end14, label %if.else + +if.else: + %cmp10 = icmp slt i32 %1, 4 + %x.else = select i1 %cmp10, i32 4, i32 5 + %y.else = select i1 %cmp10, i32 6, i32 11 + br label %if.end14 + +if.end14: + %x.0 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %x.else, %if.else ] ; <------------- A PHI with 3 entries that we can still vectorize. + %y.0 = phi i32 [ 18, %for.body ], [ 7, %if.then ], [ %y.else, %if.else ] ; <------------- A PHI with 3 entries that we can still vectorize. + store i32 %x.0, ptr %arrayidx, align 4 + store i32 %y.0, ptr %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index e9761a60fd6eb..0d5871e24c524 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -678,9 +678,8 @@ for.end: ; preds = %for.inc, %entry ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], ; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float> -; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer -; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer -; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]] +; CHECK-DAG: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer +; CHECK: %[[S1:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[ADD]], <4 x float> %[[SUB]] ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]] define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly { entry: diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll index e571b624ed194..8d407c969b527 100644 --- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll @@ -49,8 +49,8 @@ for.end: ; CHECK: define void @phi_three_incoming_values( ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK: [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> , <2 x i32> -; CHECK: [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> [[PREDPHI]] +; CHECK: [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> +; CHECK: [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> [[PREDPHI]], <2 x i32> ; CHECK: store <2 x i32> [[PREDPHI7]], ptr {{.*}} ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index c50bcf8ae88f5..2e111332ef6c4 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -587,9 +587,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; CHECK: pred.load.continue14: ; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x i64> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP47:%.*]] = xor <4 x i1> [[TMP25]], -; CHECK-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP47]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP48]], <4 x i64> [[TMP24]], <4 x i64> [[TMP46]] +; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP46]], <4 x i64> [[TMP24]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> ; CHECK-NEXT: [[PREDPHI15]] = and <4 x i64> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 8ee12cc2241c3..6407583061e60 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -111,17 +111,16 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], -; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i16> [[WIDE_LOAD]], <2 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> , <2 x i16> [[PREDPHI]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP11]], align 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0 +; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -304,17 +303,16 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP15]], -; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP17]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP18]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]] ; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> , <2 x i16> [[PREDPHI]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i32 0 -; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP20]], align 2 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: