Skip to content

Commit 5d79394

Browse files
committed
Fixups
1 parent 47b1b6b commit 5d79394

File tree

4 files changed

+117
-96
lines changed

4 files changed

+117
-96
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8715,9 +8715,10 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
87158715
case Instruction::ExtractValue: {
87168716
SmallVector<VPValue *> NewOps(Operands);
87178717
Type *I32Ty = IntegerType::getInt32Ty(I->getContext());
8718-
for (unsigned Idx : cast<ExtractValueInst>(I)->getIndices())
8719-
NewOps.push_back(
8720-
Plan.getOrAddLiveIn(ConstantInt::get(I32Ty, Idx, false)));
8718+
auto *EVI = cast<ExtractValueInst>(I);
8719+
assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index");
8720+
unsigned Idx = EVI->getIndices()[0];
8721+
NewOps.push_back(Plan.getOrAddLiveIn(ConstantInt::get(I32Ty, Idx, false)));
87218722
return new VPWidenRecipe(*I, make_range(NewOps.begin(), NewOps.end()));
87228723
}
87238724
};

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,8 +1515,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
15151515
Ctx.CostKind);
15161516
}
15171517
case Instruction::ExtractValue:
1518-
return Ctx.TTI.getInstructionCost(cast<Instruction>(getUnderlyingValue()),
1519-
TTI::TCK_RecipThroughput);
1518+
return 0;
15201519
case Instruction::ICmp:
15211520
case Instruction::FCmp: {
15221521
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Lines changed: 81 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,30 @@
1-
; RUN: opt -passes=loop-vectorize,instcombine,simplifycfg < %s -S -o - | FileCheck %s --check-prefix=CHECK
2-
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|@)" --version 5
2+
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize < %s -S -o - 2>%t | FileCheck %s
3+
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
34
; REQUIRES: asserts
45

56
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
67
target triple = "aarch64--linux-gnu"
78

89
; CHECK-COST-LABEL: struct_return_widen
9-
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val) #0
10+
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val)
1011
; CHECK-COST: Cost of 10 for VF 2: WIDEN-CALL ir<%call> = call @foo(ir<%in_val>) (using library function: fixed_vec_foo)
1112
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
1213
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
1314

1415
define void @struct_return_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
1516
; CHECK-LABEL: define void @struct_return_widen(
1617
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
17-
; CHECK-NEXT: [[ENTRY:.*]]:
18-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
19-
; CHECK: [[VECTOR_BODY]]:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
21-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
22-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
23-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 2
24-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x half>, ptr [[TMP1]], align 2
25-
; CHECK-NEXT: [[TMP2:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD]])
26-
; CHECK-NEXT: [[TMP3:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD1]])
27-
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP2]], 0
28-
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP3]], 0
29-
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP2]], 1
30-
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP3]], 1
31-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
32-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 4
33-
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr [[TMP8]], align 2
34-
; CHECK-NEXT: store <2 x half> [[TMP5]], ptr [[TMP9]], align 2
35-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
36-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP10]], i64 4
37-
; CHECK-NEXT: store <2 x half> [[TMP6]], ptr [[TMP10]], align 2
38-
; CHECK-NEXT: store <2 x half> [[TMP7]], ptr [[TMP11]], align 2
39-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
40-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
41-
; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
42-
; CHECK: [[EXIT]]:
43-
; CHECK-NEXT: ret void
18+
; CHECK: [[ENTRY:.*:]]
19+
; CHECK: [[VECTOR_PH:.*:]]
20+
; CHECK: [[VECTOR_BODY:.*:]]
21+
; CHECK: [[TMP2:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD:%.*]])
22+
; CHECK: [[TMP3:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD1:%.*]])
23+
; CHECK: [[MIDDLE_BLOCK:.*:]]
24+
; CHECK: [[SCALAR_PH:.*:]]
25+
; CHECK: [[FOR_BODY:.*:]]
26+
; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR2:[0-9]+]]
27+
; CHECK: [[EXIT:.*:]]
4428
;
4529
entry:
4630
br label %for.body
@@ -65,41 +49,24 @@ exit:
6549
}
6650

6751
; CHECK-COST-LABEL: struct_return_replicate
68-
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val) #0
52+
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val)
6953
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
7054
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
7155
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
7256

7357
define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
7458
; CHECK-LABEL: define void @struct_return_replicate(
7559
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
76-
; CHECK-NEXT: [[ENTRY:.*]]:
77-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
78-
; CHECK: [[VECTOR_BODY]]:
79-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
80-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
81-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 2
82-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 0
83-
; CHECK-NEXT: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1]]) #[[ATTR0:[0-9]+]]
84-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 1
85-
; CHECK-NEXT: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3]]) #[[ATTR0]]
86-
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { half, half } [[TMP2]], 0
87-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[TMP5]], i64 0
88-
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { half, half } [[TMP2]], 1
89-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x half> poison, half [[TMP7]], i64 0
90-
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { half, half } [[TMP4]], 0
91-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x half> [[TMP6]], half [[TMP9]], i64 1
92-
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { half, half } [[TMP4]], 1
93-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x half> [[TMP8]], half [[TMP11]], i64 1
94-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
95-
; CHECK-NEXT: store <2 x half> [[TMP10]], ptr [[TMP13]], align 2
96-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
97-
; CHECK-NEXT: store <2 x half> [[TMP12]], ptr [[TMP14]], align 2
98-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
99-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
100-
; CHECK-NEXT: br i1 [[TMP15]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
101-
; CHECK: [[EXIT]]:
102-
; CHECK-NEXT: ret void
60+
; CHECK: [[ENTRY:.*:]]
61+
; CHECK: [[VECTOR_PH:.*:]]
62+
; CHECK: [[VECTOR_BODY:.*:]]
63+
; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR3:[0-9]+]]
64+
; CHECK: [[TMP6:%.*]] = tail call { half, half } @foo(half [[TMP5:%.*]]) #[[ATTR3]]
65+
; CHECK: [[MIDDLE_BLOCK:.*:]]
66+
; CHECK: [[SCALAR_PH:.*:]]
67+
; CHECK: [[FOR_BODY:.*:]]
68+
; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR3]]
69+
; CHECK: [[EXIT:.*:]]
10370
;
10471
entry:
10572
br label %for.body
@@ -108,7 +75,7 @@ for.body:
10875
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
10976
%arrayidx = getelementptr inbounds half, ptr %in, i64 %iv
11077
%in_val = load half, ptr %arrayidx, align 2
111-
; #3 does not have a fixed-size vector mapping (so replication is used)
78+
; #1 does not have a fixed-size vector mapping (so replication is used)
11279
%call = tail call { half, half } @foo(half %in_val) #1
11380
%extract_a = extractvalue { half, half } %call, 0
11481
%extract_b = extractvalue { half, half } %call, 1
@@ -124,10 +91,64 @@ exit:
12491
ret void
12592
}
12693

94+
; CHECK-COST-LABEL: struct_return_scalable
95+
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val)
96+
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
97+
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
98+
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
99+
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @foo(ir<%in_val>)
100+
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
101+
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
102+
; CHECK-COST: Cost of 10 for VF vscale x 8: WIDEN-CALL ir<%call> = call @foo(ir<%in_val>, ir<true>) (using library function: scalable_vec_masked_foo)
103+
104+
define void @struct_return_scalable(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) #2 {
105+
; CHECK-LABEL: define void @struct_return_scalable(
106+
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] {
107+
; CHECK: [[ENTRY:.*:]]
108+
; CHECK: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
109+
; CHECK: [[VECTOR_PH:.*:]]
110+
; CHECK: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
111+
; CHECK: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
112+
; CHECK: [[VECTOR_BODY:.*:]]
113+
; CHECK: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
114+
; CHECK: [[TMP12:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @scalable_vec_masked_foo(<vscale x 8 x half> [[WIDE_LOAD:%.*]], <vscale x 8 x i1> splat (i1 true))
115+
; CHECK: [[TMP13:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @scalable_vec_masked_foo(<vscale x 8 x half> [[WIDE_LOAD1:%.*]], <vscale x 8 x i1> splat (i1 true))
116+
; CHECK: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
117+
; CHECK: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
118+
; CHECK: [[MIDDLE_BLOCK:.*:]]
119+
; CHECK: [[SCALAR_PH:.*:]]
120+
; CHECK: [[FOR_BODY:.*:]]
121+
; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR3]]
122+
; CHECK: [[EXIT:.*:]]
123+
;
124+
entry:
125+
br label %for.body
126+
127+
for.body:
128+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
129+
%arrayidx = getelementptr inbounds half, ptr %in, i64 %iv
130+
%in_val = load half, ptr %arrayidx, align 2
131+
%call = tail call { half, half } @foo(half %in_val) #1
132+
%extract_a = extractvalue { half, half } %call, 0
133+
%extract_b = extractvalue { half, half } %call, 1
134+
%arrayidx2 = getelementptr inbounds half, ptr %out_a, i64 %iv
135+
store half %extract_a, ptr %arrayidx2, align 2
136+
%arrayidx4 = getelementptr inbounds half, ptr %out_b, i64 %iv
137+
store half %extract_b, ptr %arrayidx4, align 2
138+
%iv.next = add nuw nsw i64 %iv, 1
139+
%exitcond.not = icmp eq i64 %iv.next, 1024
140+
br i1 %exitcond.not, label %exit, label %for.body
141+
142+
exit:
143+
ret void
144+
}
145+
146+
127147
declare { half, half } @foo(half)
128148

129149
declare { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half>)
130-
declare { <vscale x 4 x half>, <vscale x 4 x half> } @scalable_vec_masked_foo(<vscale x 4 x half>, <vscale x 4 x i1>)
150+
declare { <vscale x 8 x half>, <vscale x 8 x half> } @scalable_vec_masked_foo(<vscale x 8 x half>, <vscale x 8 x i1>)
131151

132152
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_foo(fixed_vec_foo)" }
133153
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
154+
attributes #2 = { "target-features"="+sve" }

llvm/test/Transforms/LoopVectorize/vplan-widen-struct-return.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; REQUIRES: asserts
2-
; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -S 2>&1 | FileCheck %s
2+
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -S 2>&1 | FileCheck %s
33

44
define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
55
; CHECK-LABEL: LV: Checking a loop in 'struct_return_f32_widen'
66
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
7-
; CHECK-NEXT: Live-in vp<%0> = VF * UF
8-
; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
7+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
8+
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
99
; CHECK-NEXT: Live-in ir<1024> = original trip-count
1010
; CHECK-EMPTY:
1111
; CHECK-NEXT: ir-bb<entry>:
@@ -16,22 +16,22 @@ define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out
1616
; CHECK-EMPTY:
1717
; CHECK-NEXT: <x1> vector loop: {
1818
; CHECK-NEXT: vector.body:
19-
; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
20-
; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
21-
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%in>, vp<%3>
22-
; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
23-
; CHECK-NEXT: WIDEN ir<%in_val> = load vp<%4>
19+
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
20+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
21+
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%in>, vp<[[STEPS]]>
22+
; CHECK-NEXT: vp<[[IN_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
23+
; CHECK-NEXT: WIDEN ir<%in_val> = load vp<[[IN_VEC_PTR]]>
2424
; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%in_val>) (using library function: fixed_vec_foo)
2525
; CHECK-NEXT: WIDEN ir<%extract_a> = extractvalue ir<%call>, ir<0>
2626
; CHECK-NEXT: WIDEN ir<%extract_b> = extractvalue ir<%call>, ir<1>
27-
; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%out_a>, vp<%3>
28-
; CHECK-NEXT: vp<%5> = vector-pointer ir<%arrayidx2>
29-
; CHECK-NEXT: WIDEN store vp<%5>, ir<%extract_a>
30-
; CHECK-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%out_b>, vp<%3>
31-
; CHECK-NEXT: vp<%6> = vector-pointer ir<%arrayidx4>
32-
; CHECK-NEXT: WIDEN store vp<%6>, ir<%extract_b>
33-
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
34-
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
27+
; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%out_a>, vp<[[STEPS]]>
28+
; CHECK-NEXT: vp<[[OUT_A_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx2>
29+
; CHECK-NEXT: WIDEN store vp<[[OUT_A_VEC_PTR]]>, ir<%extract_a>
30+
; CHECK-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%out_b>, vp<[[STEPS]]>
31+
; CHECK-NEXT: vp<[[OUT_B_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx4>
32+
; CHECK-NEXT: WIDEN store vp<[[OUT_B_VEC_PTR]]>, ir<%extract_b>
33+
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
34+
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VTC]]>
3535
; CHECK-NEXT: No successors
3636
; CHECK-NEXT: }
3737
entry:
@@ -59,8 +59,8 @@ exit:
5959
define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
6060
; CHECK-LABEL: LV: Checking a loop in 'struct_return_f32_replicate'
6161
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
62-
; CHECK-NEXT: Live-in vp<%0> = VF * UF
63-
; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
62+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
63+
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
6464
; CHECK-NEXT: Live-in ir<1024> = original trip-count
6565
; CHECK-EMPTY:
6666
; CHECK-NEXT: ir-bb<entry>:
@@ -71,22 +71,22 @@ define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly
7171
; CHECK-EMPTY:
7272
; CHECK-NEXT: <x1> vector loop: {
7373
; CHECK-NEXT: vector.body:
74-
; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
75-
; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
76-
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%in>, vp<%3>
77-
; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
78-
; CHECK-NEXT: WIDEN ir<%in_val> = load vp<%4>
74+
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
75+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
76+
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%in>, vp<[[STEPS]]>
77+
; CHECK-NEXT: vp<[[IN_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
78+
; CHECK-NEXT: WIDEN ir<%in_val> = load vp<[[IN_VEC_PTR]]>
7979
; CHECK-NEXT: REPLICATE ir<%call> = call @foo(ir<%in_val>)
8080
; CHECK-NEXT: WIDEN ir<%extract_a> = extractvalue ir<%call>, ir<0>
8181
; CHECK-NEXT: WIDEN ir<%extract_b> = extractvalue ir<%call>, ir<1>
82-
; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%out_a>, vp<%3>
83-
; CHECK-NEXT: vp<%5> = vector-pointer ir<%arrayidx2>
84-
; CHECK-NEXT: WIDEN store vp<%5>, ir<%extract_a>
85-
; CHECK-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%out_b>, vp<%3>
86-
; CHECK-NEXT: vp<%6> = vector-pointer ir<%arrayidx4>
87-
; CHECK-NEXT: WIDEN store vp<%6>, ir<%extract_b>
88-
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
89-
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
82+
; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%out_a>, vp<[[STEPS]]>
83+
; CHECK-NEXT: vp<[[OUT_A_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx2>
84+
; CHECK-NEXT: WIDEN store vp<[[OUT_A_VEC_PTR]]>, ir<%extract_a>
85+
; CHECK-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%out_b>, vp<[[STEPS]]>
86+
; CHECK-NEXT: vp<[[OUT_B_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx4>
87+
; CHECK-NEXT: WIDEN store vp<[[OUT_B_VEC_PTR]]>, ir<%extract_b>
88+
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
89+
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VTC]]>
9090
; CHECK-NEXT: No successors
9191
; CHECK-NEXT: }
9292
entry:

0 commit comments

Comments
 (0)