Skip to content

Commit 711a37e

Browse files
committed
VPlan: implement VPlan-level constant-folding
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants.
1 parent a399f06 commit 711a37e

12 files changed

+406
-154
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/ADT/TypeSwitch.h"
2929
#include "llvm/Analysis/IVDescriptors.h"
3030
#include "llvm/Analysis/LoopInfo.h"
31+
#include "llvm/Analysis/TargetFolder.h"
3132
#include "llvm/Analysis/VectorUtils.h"
3233
#include "llvm/IR/Intrinsics.h"
3334
#include "llvm/IR/PatternMatch.h"
@@ -938,10 +939,87 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
938939
}
939940
}
940941

942+
/// Try to fold \p R using TargetFolder to a constant. Will succeed for a
943+
/// handled \p Opcode if all \p Operands are constant.
944+
static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode,
945+
ArrayRef<VPValue *> Operands,
946+
const DataLayout &DL,
947+
VPTypeAnalysis &TypeInfo) {
948+
SmallVector<Value *, 4> Ops;
949+
for (VPValue *Op : Operands) {
950+
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
951+
return nullptr;
952+
Ops.push_back(Op->getLiveInIRValue());
953+
}
954+
955+
TargetFolder Folder(DL);
956+
if (Instruction::isBinaryOp(Opcode))
957+
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
958+
Ops[1]);
959+
if (Instruction::isCast(Opcode))
960+
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
961+
TypeInfo.inferScalarType(R.getVPSingleValue()));
962+
switch (Opcode) {
963+
case VPInstruction::LogicalAnd:
964+
return Folder.FoldSelect(Ops[0], Ops[1],
965+
ConstantInt::getNullValue(Ops[1]->getType()));
966+
case VPInstruction::Not:
967+
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
968+
Constant::getAllOnesValue(Ops[0]->getType()));
969+
case Instruction::Select:
970+
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
971+
case Instruction::ICmp:
972+
case Instruction::FCmp:
973+
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
974+
Ops[1]);
975+
case Instruction::GetElementPtr: {
976+
auto &RFlags = cast<VPRecipeWithIRFlags>(R);
977+
auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
978+
return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
979+
RFlags.getGEPNoWrapFlags());
980+
}
981+
case VPInstruction::PtrAdd:
982+
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
983+
Ops[1],
984+
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
985+
case Instruction::InsertElement:
986+
return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
987+
case Instruction::ExtractElement:
988+
return Folder.FoldExtractElement(Ops[0], Ops[1]);
989+
}
990+
return nullptr;
991+
}
992+
941993
/// Try to simplify recipe \p R.
942994
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
943995
using namespace llvm::VPlanPatternMatch;
944996

997+
// Constant folding.
998+
if (TypeSwitch<VPRecipeBase *, bool>(&R)
999+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
1000+
VPReplicateRecipe>([&](auto *I) {
1001+
VPlan *Plan = R.getParent()->getPlan();
1002+
const DataLayout &DL =
1003+
Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
1004+
Value *V = tryToConstantFold(*I, I->getOpcode(), I->operands(), DL,
1005+
TypeInfo);
1006+
if (V)
1007+
I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
1008+
return V;
1009+
})
1010+
.Default([](auto *) { return false; }))
1011+
return;
1012+
1013+
// Fold PredPHI constant -> constant.
1014+
if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
1015+
VPlan *Plan = R.getParent()->getPlan();
1016+
VPValue *Op = PredPHI->getOperand(0);
1017+
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
1018+
return;
1019+
if (auto *C = dyn_cast<Constant>(Op->getLiveInIRValue()))
1020+
PredPHI->replaceAllUsesWith(Plan->getOrAddLiveIn(C));
1021+
}
1022+
9451023
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
9461024
// part 0 can be replaced by their start value, if only the first lane is
9471025
// demanded.

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) {
1313
; VF2: [[VECTOR_PH]]:
1414
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1515
; VF2: [[VECTOR_BODY]]:
16-
; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 0, 1
17-
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
16+
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 0
1817
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
1918
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
2019
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>

llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll

Lines changed: 8 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,10 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
4747
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4848
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4949
; CHECK: [[MIDDLE_BLOCK]]:
50-
; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
51-
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
52-
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
53-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
5450
; CHECK-NEXT: br label %[[SCALAR_PH]]
5551
; CHECK: [[SCALAR_PH]]:
5652
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
57-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
53+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
5854
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
5955
; CHECK: [[LOOP_HEADER]]:
6056
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -141,14 +137,10 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
141137
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
142138
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
143139
; CHECK: [[MIDDLE_BLOCK]]:
144-
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
145-
; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
146-
; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
147-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
148140
; CHECK-NEXT: br label %[[SCALAR_PH]]
149141
; CHECK: [[SCALAR_PH]]:
150142
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
151-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
143+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
152144
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
153145
; CHECK: [[LOOP_HEADER]]:
154146
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -235,14 +227,10 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
235227
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
236228
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
237229
; CHECK: [[MIDDLE_BLOCK]]:
238-
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
239-
; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
240-
; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
241-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
242230
; CHECK-NEXT: br label %[[SCALAR_PH]]
243231
; CHECK: [[SCALAR_PH]]:
244232
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
245-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
233+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
246234
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
247235
; CHECK: [[LOOP_HEADER]]:
248236
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -339,14 +327,10 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
339327
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
340328
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
341329
; CHECK: [[MIDDLE_BLOCK]]:
342-
; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
343-
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
344-
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
345-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
346330
; CHECK-NEXT: br label %[[SCALAR_PH]]
347331
; CHECK: [[SCALAR_PH]]:
348332
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
349-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
333+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
350334
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
351335
; CHECK: [[LOOP_HEADER]]:
352336
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -445,14 +429,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
445429
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
446430
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
447431
; CHECK: [[MIDDLE_BLOCK]]:
448-
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
449-
; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
450-
; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
451-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
452432
; CHECK-NEXT: br label %[[SCALAR_PH]]
453433
; CHECK: [[SCALAR_PH]]:
454434
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
455-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
435+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
456436
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
457437
; CHECK: [[LOOP_HEADER]]:
458438
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -559,14 +539,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
559539
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
560540
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
561541
; CHECK: [[MIDDLE_BLOCK]]:
562-
; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
563-
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
564-
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
565-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
566542
; CHECK-NEXT: br label %[[SCALAR_PH]]
567543
; CHECK: [[SCALAR_PH]]:
568544
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
569-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
545+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
570546
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
571547
; CHECK: [[LOOP_HEADER]]:
572548
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -663,15 +639,11 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 {
663639
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
664640
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
665641
; CHECK: [[MIDDLE_BLOCK]]:
666-
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
667-
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 8
668-
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
669-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP16]]
670642
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
671643
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
672644
; CHECK: [[SCALAR_PH]]:
673645
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
674-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
646+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
675647
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
676648
; CHECK: [[LOOP_HEADER]]:
677649
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -751,15 +723,11 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 {
751723
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
752724
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
753725
; CHECK: [[MIDDLE_BLOCK]]:
754-
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
755-
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 8
756-
; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1
757-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP17]]
758726
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
759727
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
760728
; CHECK: [[SCALAR_PH]]:
761729
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
762-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
730+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
763731
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
764732
; CHECK: [[LOOP_HEADER]]:
765733
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,6 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
165165
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
166166
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32()
167167
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP10]], 4
168-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[X]], i64 0
169-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
170-
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 4 x i8> [[BROADCAST_SPLAT]] to <vscale x 4 x i1>
171-
; CHECK-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i1> splat (i1 true), [[TMP7]]
172168
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
173169
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
174170
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -181,7 +177,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
181177
; CHECK-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i32> [[BROADCAST_SPLAT4]], [[TMP14]]
182178
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i32> [[VEC_IV]], i32 0
183179
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[TMP15]], i32 9)
184-
; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> zeroinitializer
180+
; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> zeroinitializer
185181
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[BROADCAST_SPLAT2]], i32 1, <vscale x 4 x i1> [[TMP11]])
186182
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
187183
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)