Skip to content

Commit 6187a56

Browse files
committed
VPlan: use Worklist in simplifyRecipes
Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.
1 parent 55d2fff commit 6187a56

File tree

3 files changed

+149
-110
lines changed

3 files changed

+149
-110
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
7070
if (!CI)
7171
return false;
7272

73-
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
74-
"Trying the match constant with unexpected bitwidth.");
73+
if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
74+
return false;
75+
7576
return APInt::isSameValue(CI->getValue(), Val);
7677
}
7778
};
@@ -82,6 +83,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
8283

8384
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
8485

86+
inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
87+
8588
/// Matching combinators
8689
template <typename LTy, typename RTy> struct match_combine_or {
8790
LTy L;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 107 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -966,79 +966,133 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
966966
}
967967
}
968968

969-
/// Try to simplify recipe \p R.
970-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
969+
/// Try to simplify recipe \p R. Returns candidates for further simplification.
970+
static SmallVector<VPRecipeBase *>
971+
simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
971972
using namespace llvm::VPlanPatternMatch;
972973
// Try to remove redundant blend recipes.
973-
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
974+
if (auto *Blend = dyn_cast<VPBlendRecipe>(R)) {
974975
VPValue *Inc0 = Blend->getIncomingValue(0);
975976
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
976977
if (Inc0 != Blend->getIncomingValue(I) &&
977978
!match(Blend->getMask(I), m_False()))
978-
return;
979+
return {};
979980
Blend->replaceAllUsesWith(Inc0);
980981
Blend->eraseFromParent();
981-
return;
982+
return {};
982983
}
983984

984-
VPValue *A;
985-
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
986-
VPValue *Trunc = R.getVPSingleValue();
985+
VPValue *X, *X1, *Y, *Z;
986+
if (match(R, m_Trunc(m_ZExtOrSExt(m_VPValue(X))))) {
987+
VPValue *Trunc = R->getVPSingleValue();
987988
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
988-
Type *ATy = TypeInfo.inferScalarType(A);
989-
if (TruncTy == ATy) {
990-
Trunc->replaceAllUsesWith(A);
989+
Type *XTy = TypeInfo.inferScalarType(X);
990+
VPWidenCastRecipe *VPC = nullptr;
991+
if (TruncTy == XTy) {
992+
Trunc->replaceAllUsesWith(X);
991993
} else {
992994
// Don't replace a scalarizing recipe with a widened cast.
993-
if (isa<VPReplicateRecipe>(&R))
994-
return;
995-
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
996-
997-
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
998-
? Instruction::SExt
999-
: Instruction::ZExt;
1000-
auto *VPC =
1001-
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
1002-
VPC->insertBefore(&R);
1003-
Trunc->replaceAllUsesWith(VPC);
1004-
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1005-
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
1006-
VPC->insertBefore(&R);
1007-
Trunc->replaceAllUsesWith(VPC);
1008-
}
995+
if (isa<VPReplicateRecipe>(R))
996+
return {};
997+
998+
unsigned ExtOpcode = match(R->getOperand(0), m_SExt(m_VPValue()))
999+
? Instruction::SExt
1000+
: Instruction::ZExt;
1001+
VPC = XTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()
1002+
? new VPWidenCastRecipe(Instruction::Trunc, X, TruncTy)
1003+
: new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), X,
1004+
TruncTy);
1005+
VPC->insertBefore(R);
1006+
Trunc->replaceAllUsesWith(VPC);
10091007
}
10101008
#ifndef NDEBUG
10111009
// Verify that the cached type info is for both A and its users is still
10121010
// accurate by comparing it to freshly computed types.
10131011
VPTypeAnalysis TypeInfo2(
1014-
R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
1012+
R->getParent()->getPlan()->getCanonicalIV()->getScalarType(),
10151013
TypeInfo.getContext());
1016-
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
1017-
for (VPUser *U : A->users()) {
1014+
assert(TypeInfo.inferScalarType(X) == TypeInfo2.inferScalarType(X));
1015+
for (VPUser *U : X->users()) {
10181016
auto *R = dyn_cast<VPRecipeBase>(U);
10191017
if (!R)
10201018
continue;
10211019
for (VPValue *VPV : R->definedValues())
10221020
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
10231021
}
10241022
#endif
1023+
if (VPC)
1024+
return {VPC};
1025+
return {};
10251026
}
10261027

1027-
// Simplify (X && Y) || (X && !Y) -> X.
1028-
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1029-
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
1030-
// recipes to be visited during simplification.
1031-
VPValue *X, *Y, *X1, *Y1;
1032-
if (match(&R,
1033-
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1034-
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
1035-
X == X1 && Y == Y1) {
1036-
R.getVPSingleValue()->replaceAllUsesWith(X);
1037-
return;
1028+
// (X || !X) -> true.
1029+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
1030+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
1031+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1032+
R->eraseFromParent();
1033+
return {};
1034+
}
1035+
1036+
// (X || true) -> true.
1037+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
1038+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
1039+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1040+
R->eraseFromParent();
1041+
return {};
1042+
}
1043+
1044+
// (X || false) -> X.
1045+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
1046+
R->getVPSingleValue()->replaceAllUsesWith(X);
1047+
R->eraseFromParent();
1048+
return {};
1049+
}
1050+
1051+
// (X && !X) -> false.
1052+
if (match(R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
1053+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
1054+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1055+
R->eraseFromParent();
1056+
return {};
1057+
}
1058+
1059+
// (X && true) -> X.
1060+
if (match(R, m_LogicalAnd(m_VPValue(X), m_True()))) {
1061+
R->getVPSingleValue()->replaceAllUsesWith(X);
1062+
R->eraseFromParent();
1063+
return {};
1064+
}
1065+
1066+
// (X && false) -> false.
1067+
if (match(R, m_LogicalAnd(m_VPValue(X), m_False()))) {
1068+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
1069+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1070+
R->eraseFromParent();
1071+
return {};
1072+
}
1073+
1074+
// (X * 1) -> X.
1075+
if (match(R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
1076+
R->getVPSingleValue()->replaceAllUsesWith(X);
1077+
R->eraseFromParent();
1078+
return {};
1079+
}
1080+
1081+
// (X && Y) || (X && Z) -> X && (Y || Z).
1082+
if (match(R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1083+
m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
1084+
X == X1) {
1085+
auto *YorZ = new VPInstruction(Instruction::Or, {Y, Z}, R->getDebugLoc());
1086+
YorZ->insertBefore(R);
1087+
auto *VPI = new VPInstruction(VPInstruction::LogicalAnd, {X, YorZ},
1088+
R->getDebugLoc());
1089+
VPI->insertBefore(R);
1090+
R->getVPSingleValue()->replaceAllUsesWith(VPI);
1091+
R->eraseFromParent();
1092+
return {VPI, YorZ};
10381093
}
10391094

1040-
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1041-
return R.getVPSingleValue()->replaceAllUsesWith(A);
1095+
return {};
10421096
}
10431097

10441098
/// Try to simplify the recipes in \p Plan.
@@ -1047,8 +1101,16 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
10471101
Plan.getEntry());
10481102
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
10491103
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1050-
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1051-
simplifyRecipe(R, TypeInfo);
1104+
// Populate a Worklist, as simplifyRecipe might return a new recipe that we
1105+
// need to re-process.
1106+
SmallVector<VPRecipeBase *> Worklist;
1107+
for (auto &R : VPBB->getRecipeList())
1108+
Worklist.push_back(&R);
1109+
1110+
while (!Worklist.empty()) {
1111+
VPRecipeBase *R = Worklist.pop_back_val();
1112+
for (VPRecipeBase *Cand : simplifyRecipe(R, TypeInfo, Ctx))
1113+
Worklist.push_back(Cand);
10521114
}
10531115
}
10541116
}

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll

Lines changed: 37 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,135 +7,109 @@ define void @test(ptr %p, i40 %a) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
11-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
1210
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1311
; CHECK: vector.body:
1412
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
1513
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
1614
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1715
; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1816
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
19-
; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
20-
; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
21-
; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
22-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
23-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
24-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
25-
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
26-
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
2717
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
2818
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
2919
; CHECK: pred.store.if:
30-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
31-
; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1
20+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3221
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
3322
; CHECK: pred.store.continue:
3423
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
3524
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
36-
; CHECK: pred.store.if3:
37-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
38-
; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
25+
; CHECK: pred.store.if1:
26+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3927
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
28+
; CHECK: pred.store.continue2:
29+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
30+
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
31+
; CHECK: pred.store.if3:
32+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
33+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
4034
; CHECK: pred.store.continue4:
41-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
35+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
4236
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
4337
; CHECK: pred.store.if5:
44-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
45-
; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
38+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
4639
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
4740
; CHECK: pred.store.continue6:
48-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
41+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
4942
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
5043
; CHECK: pred.store.if7:
51-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
52-
; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
44+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
5345
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
5446
; CHECK: pred.store.continue8:
55-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
47+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
5648
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
5749
; CHECK: pred.store.if9:
58-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
59-
; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
50+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6051
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
6152
; CHECK: pred.store.continue10:
62-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
53+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
6354
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
6455
; CHECK: pred.store.if11:
65-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
66-
; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
56+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6757
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
6858
; CHECK: pred.store.continue12:
69-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
59+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
7060
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
7161
; CHECK: pred.store.if13:
72-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
73-
; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
62+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
7463
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
7564
; CHECK: pred.store.continue14:
76-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
65+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
7766
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
7867
; CHECK: pred.store.if15:
79-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
80-
; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
68+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8169
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
8270
; CHECK: pred.store.continue16:
83-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
71+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
8472
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
8573
; CHECK: pred.store.if17:
86-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
87-
; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
74+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8875
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
8976
; CHECK: pred.store.continue18:
90-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
77+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
9178
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
9279
; CHECK: pred.store.if19:
93-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
94-
; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
80+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
9581
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
9682
; CHECK: pred.store.continue20:
97-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
83+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
9884
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
9985
; CHECK: pred.store.if21:
100-
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
101-
; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
86+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10287
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
10388
; CHECK: pred.store.continue22:
104-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
89+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
10590
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
10691
; CHECK: pred.store.if23:
107-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
108-
; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
92+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10993
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
11094
; CHECK: pred.store.continue24:
111-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
95+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
11296
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
11397
; CHECK: pred.store.if25:
114-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
115-
; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
98+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
11699
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
117100
; CHECK: pred.store.continue26:
118-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
101+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
119102
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
120103
; CHECK: pred.store.if27:
121-
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
122-
; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
104+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
123105
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
124106
; CHECK: pred.store.continue28:
125-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
126-
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
107+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
108+
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE32]]
127109
; CHECK: pred.store.if29:
128-
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
129-
; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
130-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
131-
; CHECK: pred.store.continue30:
132-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
133-
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
134-
; CHECK: pred.store.if31:
135-
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
136-
; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1
110+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
137111
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
138-
; CHECK: pred.store.continue32:
112+
; CHECK: pred.store.continue30:
139113
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
140114
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
141115
; CHECK: middle.block:

0 commit comments

Comments
 (0)