Skip to content

Commit 3ec6f80

Browse files
committed
[VPlan] Don't created GEP x, 0 for interleave group pointers.
The GEP with offet 0 is redundant, remove it. This addresses a TODO from 7f74651 ((#106431).
1 parent 871f69f commit 3ec6f80

22 files changed

+104
-184
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+12-15
Original file line numberDiff line numberDiff line change
@@ -2611,32 +2611,29 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
26112611
assert((!BlockInMask || !Group->isReverse()) &&
26122612
"Reversed masked interleave-group not supported.");
26132613

2614-
Value *Index;
2614+
VPValue *Addr = getAddr();
2615+
Value *ResAddr = State.get(Addr, VPLane(0));
2616+
if (auto *I = dyn_cast<Instruction>(ResAddr))
2617+
State.setDebugLocFrom(I->getDebugLoc());
2618+
26152619
// If the group is reverse, adjust the index to refer to the last vector lane
26162620
// instead of the first. We adjust the index from the first vector lane,
26172621
// rather than directly getting the pointer for lane VF - 1, because the
26182622
// pointer operand of the interleaved access is supposed to be uniform.
26192623
if (Group->isReverse()) {
26202624
Value *RuntimeVF =
26212625
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2622-
Index = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2626+
Value *Index =
2627+
State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
26232628
Index = State.Builder.CreateMul(Index,
26242629
State.Builder.getInt32(Group->getFactor()));
26252630
Index = State.Builder.CreateNeg(Index);
2626-
} else {
2627-
// TODO: Drop redundant 0-index GEP as follow-up.
2628-
Index = State.Builder.getInt32(0);
2629-
}
26302631

2631-
VPValue *Addr = getAddr();
2632-
Value *ResAddr = State.get(Addr, VPLane(0));
2633-
if (auto *I = dyn_cast<Instruction>(ResAddr))
2634-
State.setDebugLocFrom(I->getDebugLoc());
2635-
2636-
bool InBounds = false;
2637-
if (auto *gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2638-
InBounds = gep->isInBounds();
2639-
ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2632+
bool InBounds = false;
2633+
if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2634+
InBounds = Gep->isInBounds();
2635+
ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2636+
}
26402637

26412638
State.setDebugLocFrom(Instr->getDebugLoc());
26422639
Value *PoisonVec = PoisonValue::get(VecTy);

llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
2929
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]]
3030
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]]
3131
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]]
32-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
33-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4
32+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
3433
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
3534
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
3635
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1

llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@ define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst
1414
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1515
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3
1616
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
17-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
1817
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1918
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
2019
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
2120
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i32> [[TMP11]], <12 x i32> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
22-
; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
21+
; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4
2322
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2423
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
2524
; CHECK-NEXT: br i1 [[TMP12]], label %middle.block, label %vector.body

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

+3-6
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
525525
; CHECK-UNORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
526526
; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 0
527527
; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
528-
; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
529-
; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4
528+
; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4
530529
; CHECK-UNORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]])
531530
; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
532531
; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1
@@ -595,8 +594,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
595594
; CHECK-ORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
596595
; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0
597596
; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]]
598-
; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0
599-
; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4
597+
; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4
600598
; CHECK-ORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]])
601599
; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
602600
; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1
@@ -669,9 +667,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
669667
; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
670668
; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0
671669
; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
672-
; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
673670
; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
674-
; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP15]], i32 4, <vscale x 8 x i1> [[INTERLEAVED_MASK]], <vscale x 8 x float> poison)
671+
; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP14]], i32 4, <vscale x 8 x i1> [[INTERLEAVED_MASK]], <vscale x 8 x float> poison)
675672
; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_MASKED_VEC]])
676673
; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
677674
; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1

llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,7 @@ define void @test_stride2_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
148148
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 2
149149
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 2
150150
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP4]]
151-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
152-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4
151+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
153152
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
154153
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[STRIDED_VEC]]
155154
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP2]]

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

+2-4
Original file line numberDiff line numberDiff line change
@@ -431,12 +431,10 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
431431
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
432432
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]]
433433
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]]
434-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0
435-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP9]], align 1
434+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1
436435
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
437436
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
438-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
439-
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP10]], align 1
437+
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP8]], align 1
440438
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
441439
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
442440
; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[STRIDED_VEC4]] to <8 x i32>

0 commit comments

Comments
 (0)