Skip to content

Commit a43d999

Browse files
committed
[VPlan] Check if only first part is used for all per-part VPInsts.
Apply the onlyFirstPartUsed logic generally to all per-part VPInstructions. Note that the test changes remove the second part of an unsued first-order recurrence splice.
1 parent 44aecca commit a43d999

File tree

5 files changed

+8
-13
lines changed

5 files changed

+8
-13
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
324324

325325
if (Instruction::isBinaryOp(getOpcode())) {
326326
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
327-
if (Part != 0 && vputils::onlyFirstPartUsed(this))
328-
return State.get(this, 0, OnlyFirstLaneUsed);
329-
330327
Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
331328
Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
332329
auto *Res =
@@ -628,6 +625,7 @@ void VPInstruction::execute(VPTransformState &State) {
628625
canGenerateScalarForFirstLane() &&
629626
(vputils::onlyFirstLaneUsed(this) || isVectorToScalar());
630627
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
628+
bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
631629
for (unsigned Part = 0; Part < State.UF; ++Part) {
632630
if (GeneratesPerAllLanes) {
633631
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
@@ -639,6 +637,13 @@ void VPInstruction::execute(VPTransformState &State) {
639637
continue;
640638
}
641639

640+
if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
641+
Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
642+
State.set(this, Part0, Part,
643+
/*IsScalar*/ GeneratesPerFirstLaneOnly);
644+
continue;
645+
}
646+
642647
Value *GeneratedValue = generatePerPart(State, Part);
643648
if (!hasResult())
644649
continue;

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
121121
; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64>
122122
; CHECK-NEXT: [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64>
123123
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[TMP8]], <2 x i32> <i32 1, i32 2>
124-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 1, i32 2>
125124
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
126125
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
127126
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -186,7 +185,6 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
186185
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
187186
; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64>
188187
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
189-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
190188
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
191189
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
192190
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]

llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,6 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C
235235
; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <16 x i64> zeroinitializer, [[VEC_IND]]
236236
; CHECK-NEXT: [[TMP1]] = sub nsw <16 x i64> zeroinitializer, [[STEP_ADD]]
237237
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VECTOR_RECUR]], <16 x i64> [[TMP0]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
238-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i64> [[TMP0]], <16 x i64> [[TMP1]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
239238
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i64> [[TMP1]], i32 15
240239
; CHECK-NEXT: store i64 [[TMP4]], ptr [[GEP]], align 8
241240
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32

llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,6 @@ define i32 @PR27246() {
912912
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
913913
; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
914914
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
915-
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
916915
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
917916
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 -4, i32 -4, i32 -4, i32 -4>
918917
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -1121,7 +1120,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
11211120
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]], i32 2
11221121
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[VECTOR_RECUR_EXTRACT]], i32 3
11231122
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1124-
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
11251123
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
11261124
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
11271125
; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -1393,7 +1391,6 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
13931391
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
13941392
; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
13951393
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1396-
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
13971394
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
13981395
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
13991396
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
@@ -2572,7 +2569,6 @@ define void @sink_dead_inst(ptr %a) {
25722569
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
25732570
; UNROLL-NO-IC-NEXT: [[TMP5]] = zext <4 x i16> [[TMP3]] to <4 x i32>
25742571
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR2]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2575-
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
25762572
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP2]], <i16 5, i16 5, i16 5, i16 5>
25772573
; UNROLL-NO-IC-NEXT: [[TMP9]] = add <4 x i16> [[TMP3]], <i16 5, i16 5, i16 5, i16 5>
25782574
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
@@ -3491,7 +3487,6 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
34913487
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
34923488
; UNROLL-NO-IC-NEXT: [[TMP7]] = zext <4 x i16> [[TMP5]] to <4 x i32>
34933489
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3494-
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
34953490
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
34963491
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]]
34973492
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
@@ -3665,7 +3660,6 @@ define void @unused_recurrence(ptr %a) {
36653660
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
36663661
; UNROLL-NO-IC-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], <i16 5, i16 5, i16 5, i16 5>
36673662
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3668-
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
36693663
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
36703664
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
36713665
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024

llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,6 @@ define i64 @constant_folded_previous_value() {
172172
; CHECK-VF4UF2: vector.body
173173
; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %vector.body ]
174174
; CHECK-VF4UF2: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
175-
; CHECK-VF4UF2: %[[SPLICE2:.*]] = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
176175
; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body
177176
entry:
178177
br label %scalar.body

0 commit comments

Comments
 (0)