Skip to content

Commit 5bff837

Browse files
committed
Update to pass assertion comparing two cost models
1 parent a21d8c2 commit 5bff837

File tree

3 files changed

+89
-87
lines changed

3 files changed

+89
-87
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5544,14 +5544,28 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
55445544
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
55455545
InstructionCost Cost;
55465546

5547+
// If with the given VF loop gets fully unrolled, ignore the costs of
5548+
// comparison and induction instructions, as they'll get simplified away
5549+
SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
5550+
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
5551+
auto *Cmp = TheLoop->getLatchCmpInst();
5552+
if (Cmp && TC == VF.getKnownMinValue()) {
5553+
ValuesToIgnoreForVF.insert(Cmp);
5554+
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
5555+
Instruction *IVInc = cast<Instruction>(
5556+
IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
5557+
ValuesToIgnoreForVF.insert(IVInc);
5558+
}
5559+
}
5560+
55475561
// For each block.
55485562
for (BasicBlock *BB : TheLoop->blocks()) {
55495563
InstructionCost BlockCost;
55505564

55515565
// For each instruction in the old loop.
55525566
for (Instruction &I : BB->instructionsWithoutDebug()) {
55535567
// Skip ignored values.
5554-
if (ValuesToIgnore.count(&I) ||
5568+
if (ValuesToIgnore.count(&I) || ValuesToIgnoreForVF.count(&I) ||
55555569
(VF.isVector() && VecValuesToIgnore.count(&I)))
55565570
continue;
55575571

@@ -7222,22 +7236,16 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
72227236
IVInsts.push_back(CI);
72237237
}
72247238

7225-
// If the given VF loop gets fully unrolled, ignore the costs of comparison
7226-
// and increment instruction, as they'll get simplified away
7239+
// If with the given VF loop gets fully unrolled, ignore the costs of
7240+
// comparison and induction instructions, as they'll get simplified away
72277241
auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
72287242
auto *Cmp = OrigLoop->getLatchCmpInst();
7229-
if (Cmp && VF.isFixed() && VF.getFixedValue() == TC) {
7243+
if (Cmp && TC == VF.getKnownMinValue()) {
72307244
CostCtx.SkipCostComputation.insert(Cmp);
7231-
for (Instruction *IVInst : IVInsts) {
7232-
bool IsSimplifiedAway = true;
7233-
for (auto *UIV : IVInst->users()) {
7234-
if (!Legal->isInductionVariable(UIV) && UIV != Cmp) {
7235-
IsSimplifiedAway = false;
7236-
break;
7237-
}
7238-
}
7239-
if (IsSimplifiedAway)
7240-
CostCtx.SkipCostComputation.insert(IVInst);
7245+
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
7246+
Instruction *IVInc = cast<Instruction>(
7247+
IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
7248+
CostCtx.SkipCostComputation.insert(IVInc);
72417249
}
72427250
}
72437251

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 61 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -49,27 +49,27 @@ define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
4949
; CHECK: vector.ph:
5050
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
5151
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
52-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
53-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP4]]
52+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
53+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP2]]
5454
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
5555
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
56-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
57-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
56+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
57+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
5858
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
5959
; CHECK: vector.body:
6060
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
61-
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
62-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 3)
63-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]]
64-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
65-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP9]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
66-
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
67-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]]
68-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
69-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
70-
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
71-
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP13]], ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
72-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
61+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
62+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 3)
63+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]]
64+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
65+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP7]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
66+
; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
67+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]]
68+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
69+
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP10]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
70+
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i8> [[TMP8]], [[WIDE_MASKED_LOAD1]]
71+
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP11]], ptr [[TMP10]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
72+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
7373
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
7474
; CHECK: middle.block:
7575
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -79,11 +79,11 @@ define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
7979
; CHECK: for.body:
8080
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
8181
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
82-
; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
83-
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP14]], 1
82+
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
83+
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP12]], 1
8484
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
85-
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
86-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP15]]
85+
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
86+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP13]]
8787
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
8888
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
8989
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 3
@@ -118,27 +118,27 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
118118
; CHECK: vector.ph:
119119
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
120120
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
121-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
122-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP4]]
121+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
122+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP2]]
123123
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
124124
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
125-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
126-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
125+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
126+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
127127
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
128128
; CHECK: vector.body:
129129
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
130-
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
131-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP7]], i64 5)
132-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]]
133-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
134-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
135-
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer)
136-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]]
137-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
138-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
139-
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
140-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
141-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
130+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
131+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP5]], i64 5)
132+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]]
133+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
134+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP7]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
135+
; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer)
136+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]]
137+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
138+
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP10]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
139+
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i8> [[TMP8]], [[WIDE_MASKED_LOAD1]]
140+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP11]], ptr [[TMP10]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
141+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
142142
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
143143
; CHECK: middle.block:
144144
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -148,11 +148,11 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
148148
; CHECK: for.body:
149149
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
150150
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
151-
; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
152-
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP14]], 1
151+
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
152+
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP12]], 1
153153
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
154-
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
155-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP15]]
154+
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
155+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP13]]
156156
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
157157
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
158158
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
@@ -186,28 +186,28 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
186186
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
187187
; CHECK: vector.ph:
188188
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
189-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
190-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
191-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
189+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
190+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
191+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP2]]
192192
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
193193
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
194-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
195-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
194+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
195+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
196196
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
197197
; CHECK: vector.body:
198198
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
199-
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
200-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP7]], i64 8)
201-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]]
202-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
203-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
204-
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer)
205-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]]
206-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
207-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
208-
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
209-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
210-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
199+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
200+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP5]], i64 8)
201+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]]
202+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
203+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP7]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)
204+
; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 8 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
205+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]]
206+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
207+
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP10]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)
208+
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i8> [[TMP8]], [[WIDE_MASKED_LOAD1]]
209+
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP11]], ptr [[TMP10]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
210+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
211211
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
212212
; CHECK: middle.block:
213213
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -217,11 +217,11 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
217217
; CHECK: for.body:
218218
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
219219
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
220-
; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
221-
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP14]], 1
220+
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
221+
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP12]], 1
222222
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
223-
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
224-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP15]]
223+
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
224+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP13]]
225225
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
226226
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
227227
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8

0 commit comments

Comments
 (0)