Skip to content

Commit 19d0aec

Browse files
committed
[LV] Support generating masks for switch terminators.
Update createEdgeMask to created masks where the terminator in Src is a switch. We need to handle 2 separate cases: 1. Dst is not the default desintation. Dst is reached if any of the cases with destination == Dst are taken. Join the conditions for each case where destination == Dst using a logical OR. 2. Dst is the default destination. Dst is reached if none of the cases with destination != Dst are taken. Join the conditions for each case where the destination is != Dst using a logical OR and negate it. Fixes #48188.
1 parent 05f986e commit 19d0aec

File tree

7 files changed

+920
-46
lines changed

7 files changed

+920
-46
lines changed

clang/test/Frontend/optimization-remark-analysis.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clang -O1 -fvectorize -target x86_64-unknown-unknown -emit-llvm -Rpass-analysis -S %s -o - 2>&1 | FileCheck %s --check-prefix=RPASS
22
// RUN: %clang -O1 -fvectorize -target x86_64-unknown-unknown -emit-llvm -S %s -o - 2>&1 | FileCheck %s
33

4-
// RPASS: {{.*}}:12:5: remark: loop not vectorized: loop contains a switch statement
4+
// RPASS-NOT: {{.*}}:12:5: remark: loop not vectorized
55
// CHECK-NOT: remark: loop not vectorized: loop contains a switch statement
66

77
double foo(int N, int *Array) {

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,11 +1348,11 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
13481348
// Collect the blocks that need predication.
13491349
for (BasicBlock *BB : TheLoop->blocks()) {
13501350
// We don't support switch statements inside loops.
1351-
if (!isa<BranchInst>(BB->getTerminator())) {
1352-
reportVectorizationFailure("Loop contains a switch statement",
1353-
"loop contains a switch statement",
1354-
"LoopContainsSwitch", ORE, TheLoop,
1355-
BB->getTerminator());
1351+
if (!isa<BranchInst, SwitchInst>(BB->getTerminator())) {
1352+
reportVectorizationFailure("Loop contains an unsupported termaintor",
1353+
"loop contains an unsupported terminator",
1354+
"LoopContainsUnsupportedTerminator", ORE,
1355+
TheLoop, BB->getTerminator());
13561356
return false;
13571357
}
13581358

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7763,6 +7763,41 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
77637763

77647764
VPValue *SrcMask = getBlockInMask(Src);
77657765

7766+
if (auto *SI = dyn_cast<SwitchInst>(Src->getTerminator())) {
7767+
// Create mask where the terminator in Src is a switch. We need to handle 2
7768+
// separate cases:
7769+
// 1. Dst is not the default desintation. Dst is reached if any of the cases
7770+
// with destination == Dst are taken. Join the conditions for each case
7771+
// where destination == Dst using a logical OR.
7772+
// 2. Dst is the default destination. Dst is reached if none of the cases
7773+
// with destination != Dst are taken. Join the conditions for each case
7774+
// where the destination is != Dst using a logical OR and negate it.
7775+
VPValue *Mask = nullptr;
7776+
VPValue *Cond = getVPValueOrAddLiveIn(SI->getCondition(), Plan);
7777+
bool IsDefault = SI->getDefaultDest() == Dst;
7778+
for (auto &C : SI->cases()) {
7779+
if (IsDefault) {
7780+
if (C.getCaseSuccessor() == Dst)
7781+
continue;
7782+
} else if (C.getCaseSuccessor() != Dst)
7783+
continue;
7784+
7785+
VPValue *Eq = EdgeMaskCache.lookup({Src, C.getCaseSuccessor()});
7786+
if (!Eq) {
7787+
VPValue *V = getVPValueOrAddLiveIn(C.getCaseValue(), Plan);
7788+
Eq = Builder.createICmp(CmpInst::ICMP_EQ, Cond, V);
7789+
}
7790+
if (Mask)
7791+
Mask = Builder.createOr(Mask, Eq);
7792+
else
7793+
Mask = Eq;
7794+
}
7795+
if (IsDefault)
7796+
Mask = Builder.createNot(Mask);
7797+
assert(Mask && "mask must be created");
7798+
return EdgeMaskCache[Edge] = Mask;
7799+
}
7800+
77667801
// The terminator has to be a branch inst!
77677802
BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
77687803
assert(BI && "Unexpected terminator found");

llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll

Lines changed: 623 additions & 24 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/LoopVectorize/no_switch.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,16 @@
22
; RUN: opt < %s -passes=loop-vectorize,transform-warning -force-vector-width=1 -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
33
; RUN: opt < %s -passes=loop-vectorize,transform-warning -force-vector-width=4 -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
44

5-
; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
6-
; CHECK: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
5+
; CHECK-NOT: loop not vectorized: loop contains a switch statement
6+
; CHECK-NOT: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
77

88
; NOANALYSIS-NOT: remark: {{.*}}
9-
; NOANALYSIS: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
9+
; NOANALYSIS: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
1010

11-
; MOREINFO: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
12-
; MOREINFO: remark: source.cpp:4:5: loop not vectorized (Force=true, Vector Width=4)
13-
; MOREINFO: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
11+
; MOREINFO-NOT: remark
1412

1513
; CHECK: _Z11test_switchPii
16-
; CHECK-NOT: x i32>
14+
; CHECK: vector.body:
1715
; CHECK: ret
1816

1917
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

llvm/test/Transforms/LoopVectorize/predicate-switch.ll

Lines changed: 196 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,76 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
66
; IC1-LABEL: define void @switch4_default_common_dest_with_case(
77
; IC1-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) {
88
; IC1-NEXT: [[ENTRY:.*]]:
9+
; IC1-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
10+
; IC1-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
11+
; IC1-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
12+
; IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
13+
; IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
14+
; IC1: [[VECTOR_PH]]:
15+
; IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
16+
; IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
17+
; IC1-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
18+
; IC1-NEXT: br label %[[VECTOR_BODY:.*]]
19+
; IC1: [[VECTOR_BODY]]:
20+
; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE13:.*]] ]
21+
; IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
22+
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
23+
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
24+
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
25+
; IC1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
26+
; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
27+
; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 13, i8 13>
28+
; IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
29+
; IC1-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
30+
; IC1: [[PRED_STORE_IF]]:
31+
; IC1-NEXT: store i8 0, ptr [[NEXT_GEP]], align 1
32+
; IC1-NEXT: br label %[[PRED_STORE_CONTINUE]]
33+
; IC1: [[PRED_STORE_CONTINUE]]:
34+
; IC1-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
35+
; IC1-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
36+
; IC1: [[PRED_STORE_IF4]]:
37+
; IC1-NEXT: store i8 0, ptr [[NEXT_GEP3]], align 1
38+
; IC1-NEXT: br label %[[PRED_STORE_CONTINUE5]]
39+
; IC1: [[PRED_STORE_CONTINUE5]]:
40+
; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 -12, i8 -12>
41+
; IC1-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
42+
; IC1-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
43+
; IC1: [[PRED_STORE_IF6]]:
44+
; IC1-NEXT: store i8 42, ptr [[NEXT_GEP]], align 1
45+
; IC1-NEXT: br label %[[PRED_STORE_CONTINUE7]]
46+
; IC1: [[PRED_STORE_CONTINUE7]]:
47+
; IC1-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
48+
; IC1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
49+
; IC1: [[PRED_STORE_IF8]]:
50+
; IC1-NEXT: store i8 42, ptr [[NEXT_GEP3]], align 1
51+
; IC1-NEXT: br label %[[PRED_STORE_CONTINUE9]]
52+
; IC1: [[PRED_STORE_CONTINUE9]]:
53+
; IC1-NEXT: [[TMP10:%.*]] = or <2 x i1> [[TMP7]], [[TMP4]]
54+
; IC1-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP10]], <i1 true, i1 true>
55+
; IC1-NEXT: [[TMP12:%.*]] = or <2 x i1> [[TMP11]], [[TMP11]]
56+
; IC1-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
57+
; IC1-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
58+
; IC1: [[PRED_STORE_IF10]]:
59+
; IC1-NEXT: store i8 2, ptr [[NEXT_GEP]], align 1
60+
; IC1-NEXT: br label %[[PRED_STORE_CONTINUE11]]
61+
; IC1: [[PRED_STORE_CONTINUE11]]:
62+
; IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
63+
; IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13]]
64+
; IC1: [[PRED_STORE_IF12]]:
65+
; IC1-NEXT: store i8 2, ptr [[NEXT_GEP3]], align 1
66+
; IC1-NEXT: br label %[[PRED_STORE_CONTINUE13]]
67+
; IC1: [[PRED_STORE_CONTINUE13]]:
68+
; IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
69+
; IC1-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
70+
; IC1-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
71+
; IC1: [[MIDDLE_BLOCK]]:
72+
; IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
73+
; IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
74+
; IC1: [[SCALAR_PH]]:
75+
; IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
976
; IC1-NEXT: br label %[[LOOP_HEADER:.*]]
1077
; IC1: [[LOOP_HEADER]]:
11-
; IC1-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
78+
; IC1-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1279
; IC1-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 1
1380
; IC1-NEXT: switch i8 [[L]], label %[[DEFAULT:.*]] [
1481
; IC1-NEXT: i8 -12, label %[[IF_THEN_1:.*]]
@@ -27,16 +94,130 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
2794
; IC1: [[LOOP_LATCH]]:
2895
; IC1-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
2996
; IC1-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
30-
; IC1-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
97+
; IC1-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
3198
; IC1: [[EXIT]]:
3299
; IC1-NEXT: ret void
33100
;
34101
; IC2-LABEL: define void @switch4_default_common_dest_with_case(
35102
; IC2-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) {
36103
; IC2-NEXT: [[ENTRY:.*]]:
104+
; IC2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
105+
; IC2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
106+
; IC2-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
107+
; IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
108+
; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
109+
; IC2: [[VECTOR_PH]]:
110+
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
111+
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
112+
; IC2-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
113+
; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
114+
; IC2: [[VECTOR_BODY]]:
115+
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ]
116+
; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
117+
; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
118+
; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
119+
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
120+
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
121+
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
122+
; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
123+
; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
124+
; IC2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
125+
; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
126+
; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1
127+
; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
128+
; IC2-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 13, i8 13>
129+
; IC2-NEXT: [[TMP8:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], <i8 13, i8 13>
130+
; IC2-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
131+
; IC2-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
132+
; IC2: [[PRED_STORE_IF]]:
133+
; IC2-NEXT: store i8 0, ptr [[NEXT_GEP]], align 1
134+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
135+
; IC2: [[PRED_STORE_CONTINUE]]:
136+
; IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
137+
; IC2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
138+
; IC2: [[PRED_STORE_IF7]]:
139+
; IC2-NEXT: store i8 0, ptr [[NEXT_GEP3]], align 1
140+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
141+
; IC2: [[PRED_STORE_CONTINUE8]]:
142+
; IC2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
143+
; IC2-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
144+
; IC2: [[PRED_STORE_IF9]]:
145+
; IC2-NEXT: store i8 0, ptr [[NEXT_GEP4]], align 1
146+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
147+
; IC2: [[PRED_STORE_CONTINUE10]]:
148+
; IC2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
149+
; IC2-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
150+
; IC2: [[PRED_STORE_IF11]]:
151+
; IC2-NEXT: store i8 0, ptr [[NEXT_GEP5]], align 1
152+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
153+
; IC2: [[PRED_STORE_CONTINUE12]]:
154+
; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 -12, i8 -12>
155+
; IC2-NEXT: [[TMP14:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], <i8 -12, i8 -12>
156+
; IC2-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
157+
; IC2-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
158+
; IC2: [[PRED_STORE_IF13]]:
159+
; IC2-NEXT: store i8 42, ptr [[NEXT_GEP]], align 1
160+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
161+
; IC2: [[PRED_STORE_CONTINUE14]]:
162+
; IC2-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
163+
; IC2-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
164+
; IC2: [[PRED_STORE_IF15]]:
165+
; IC2-NEXT: store i8 42, ptr [[NEXT_GEP3]], align 1
166+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
167+
; IC2: [[PRED_STORE_CONTINUE16]]:
168+
; IC2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP14]], i32 0
169+
; IC2-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
170+
; IC2: [[PRED_STORE_IF17]]:
171+
; IC2-NEXT: store i8 42, ptr [[NEXT_GEP4]], align 1
172+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
173+
; IC2: [[PRED_STORE_CONTINUE18]]:
174+
; IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP14]], i32 1
175+
; IC2-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
176+
; IC2: [[PRED_STORE_IF19]]:
177+
; IC2-NEXT: store i8 42, ptr [[NEXT_GEP5]], align 1
178+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
179+
; IC2: [[PRED_STORE_CONTINUE20]]:
180+
; IC2-NEXT: [[TMP19:%.*]] = or <2 x i1> [[TMP13]], [[TMP7]]
181+
; IC2-NEXT: [[TMP20:%.*]] = or <2 x i1> [[TMP14]], [[TMP8]]
182+
; IC2-NEXT: [[TMP21:%.*]] = xor <2 x i1> [[TMP19]], <i1 true, i1 true>
183+
; IC2-NEXT: [[TMP22:%.*]] = xor <2 x i1> [[TMP20]], <i1 true, i1 true>
184+
; IC2-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP21]], [[TMP21]]
185+
; IC2-NEXT: [[TMP24:%.*]] = or <2 x i1> [[TMP22]], [[TMP22]]
186+
; IC2-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0
187+
; IC2-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
188+
; IC2: [[PRED_STORE_IF21]]:
189+
; IC2-NEXT: store i8 2, ptr [[NEXT_GEP]], align 1
190+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
191+
; IC2: [[PRED_STORE_CONTINUE22]]:
192+
; IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1
193+
; IC2-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
194+
; IC2: [[PRED_STORE_IF23]]:
195+
; IC2-NEXT: store i8 2, ptr [[NEXT_GEP3]], align 1
196+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
197+
; IC2: [[PRED_STORE_CONTINUE24]]:
198+
; IC2-NEXT: [[TMP27:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0
199+
; IC2-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
200+
; IC2: [[PRED_STORE_IF25]]:
201+
; IC2-NEXT: store i8 2, ptr [[NEXT_GEP4]], align 1
202+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
203+
; IC2: [[PRED_STORE_CONTINUE26]]:
204+
; IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP24]], i32 1
205+
; IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]]
206+
; IC2: [[PRED_STORE_IF27]]:
207+
; IC2-NEXT: store i8 2, ptr [[NEXT_GEP5]], align 1
208+
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
209+
; IC2: [[PRED_STORE_CONTINUE28]]:
210+
; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
211+
; IC2-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
212+
; IC2-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
213+
; IC2: [[MIDDLE_BLOCK]]:
214+
; IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
215+
; IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
216+
; IC2: [[SCALAR_PH]]:
217+
; IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
37218
; IC2-NEXT: br label %[[LOOP_HEADER:.*]]
38219
; IC2: [[LOOP_HEADER]]:
39-
; IC2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
220+
; IC2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
40221
; IC2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 1
41222
; IC2-NEXT: switch i8 [[L]], label %[[DEFAULT:.*]] [
42223
; IC2-NEXT: i8 -12, label %[[IF_THEN_1:.*]]
@@ -55,7 +236,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
55236
; IC2: [[LOOP_LATCH]]:
56237
; IC2-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
57238
; IC2-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
58-
; IC2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
239+
; IC2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
59240
; IC2: [[EXIT]]:
60241
; IC2-NEXT: ret void
61242
;
@@ -91,3 +272,14 @@ loop.latch:
91272
exit:
92273
ret void
93274
}
275+
;.
276+
; IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
277+
; IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
278+
; IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
279+
; IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
280+
;.
281+
; IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
282+
; IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
283+
; IC2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
284+
; IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
285+
;.

0 commit comments

Comments
 (0)