Skip to content

Commit 6b98134

Browse files
committed
[VPlan] Re-enable narrowing interleave groups with interleaving.
Remove the UF = 1 restriction introduced by 577631f building on top of 783a846, which allows updating all relevant users of the VF, VPScalarIVSteps in particular. This restores the full functionality of #106441.
1 parent 0cd8232 commit 6b98134

File tree

3 files changed

+17
-37
lines changed

3 files changed

+17
-37
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2472,7 +2472,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
24722472
unsigned VectorRegWidth) {
24732473
using namespace llvm::VPlanPatternMatch;
24742474
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
2475-
if (VF.isScalable() || !VectorLoop || Plan.getUF() != 1)
2475+
if (VF.isScalable() || !VectorLoop)
24762476
return;
24772477

24782478
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
@@ -2599,5 +2599,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
25992599
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
26002600
Inc->setOperand(1, Plan.getOrAddLiveIn(ConstantInt::get(
26012601
CanIV->getScalarType(), 1 * Plan.getUF())));
2602+
Plan.getVF().replaceAllUsesWith(
2603+
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
26022604
removeDeadRecipes(Plan);
26032605
}

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -92,36 +92,22 @@ define void @test_complex_add_double(ptr %res, ptr noalias %A, ptr noalias %B, i
9292
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
9393
; CHECK: [[VECTOR_BODY]]:
9494
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
95-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
95+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
9696
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[INDEX]]
9797
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP1]]
9898
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[INDEX]]
9999
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[TMP1]]
100-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
101-
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
102-
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
103-
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP3]], align 4
104-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
105-
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
106-
; CHECK-NEXT: [[WIDE_VEC5:%.*]] = load <4 x double>, ptr [[TMP4]], align 4
107-
; CHECK-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <4 x double> [[WIDE_VEC5]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
108-
; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <4 x double> [[WIDE_VEC5]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
109-
; CHECK-NEXT: [[WIDE_VEC8:%.*]] = load <4 x double>, ptr [[TMP5]], align 4
110-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = shufflevector <4 x double> [[WIDE_VEC8]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
111-
; CHECK-NEXT: [[STRIDED_VEC11:%.*]] = shufflevector <4 x double> [[WIDE_VEC8]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
112-
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[STRIDED_VEC4]], [[STRIDED_VEC10]]
113-
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
100+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
101+
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = load <2 x double>, ptr [[TMP3]], align 4
102+
; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = load <2 x double>, ptr [[TMP4]], align 4
103+
; CHECK-NEXT: [[STRIDED_VEC11:%.*]] = load <2 x double>, ptr [[TMP5]], align 4
114104
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[STRIDED_VEC1]], [[STRIDED_VEC7]]
115105
; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[STRIDED_VEC5]], [[STRIDED_VEC11]]
116106
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[INDEX]]
117107
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP1]]
118-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
119-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
120-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4
121-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
122-
; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
123-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC11]], ptr [[TMP11]], align 4
124-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
108+
; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 4
109+
; CHECK-NEXT: store <2 x double> [[TMP15]], ptr [[TMP11]], align 4
110+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
125111
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
126112
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
127113
; CHECK: [[MIDDLE_BLOCK]]:

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,16 @@ define void @load_store_interleave_group(ptr noalias %data) {
1313
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1414
; CHECK: [[VECTOR_BODY]]:
1515
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
16-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
16+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1717
; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDEX]], 1
1818
; CHECK-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP1]], 1
1919
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP2]]
2020
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]]
21-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
22-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
23-
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
24-
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
25-
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
26-
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
27-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
28-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
29-
; CHECK-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8
30-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC3]], <2 x i64> [[STRIDED_VEC4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
31-
; CHECK-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
32-
; CHECK-NEXT: store <4 x i64> [[INTERLEAVED_VEC5]], ptr [[TMP5]], align 8
33-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
21+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
22+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
23+
; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP4]], align 8
24+
; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD1]], ptr [[TMP5]], align 8
25+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3426
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
3527
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3628
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)