Skip to content

Commit e23ab73

Browse files
authored
[VPlan] Don't convert widen recipes to VP intrinsics in EVL transform (#127180)
This is a copy of #126177, since it was automatically and permanently closed because I messed up the source branch on my remote This patch proposes to avoid converting widening recipes to VP intrinsics during the EVL transform. IIUC we initially did this to avoid `vl` toggles on RISC-V. However we now have the RISCVVLOptimizer pass which mostly makes this redundant. Emitting regular IR instead of VP intrinsics allows more generic optimisations, both in the middle end and DAGCombiner, and we generally have better patterns in the RISC-V backend for non-VP nodes. Sticking to regular IR instructions is likely a lot less work than reimplementing all of these optimisations for VP intrinsics, and on SPEC CPU 2017 we get noticeably better code generation.
1 parent cbd3801 commit e23ab73

26 files changed

+616
-271
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
523523
case VPRecipeBase::VPWidenGEPSC:
524524
case VPRecipeBase::VPWidenIntrinsicSC:
525525
case VPRecipeBase::VPWidenSC:
526-
case VPRecipeBase::VPWidenEVLSC:
527526
case VPRecipeBase::VPWidenSelectSC:
528527
case VPRecipeBase::VPBlendSC:
529528
case VPRecipeBase::VPPredInstPHISC:
@@ -710,7 +709,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
710709
static inline bool classof(const VPRecipeBase *R) {
711710
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
712711
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
713-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
714712
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
715713
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
716714
R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
@@ -1113,8 +1111,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
11131111
}
11141112

11151113
static inline bool classof(const VPRecipeBase *R) {
1116-
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1117-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1114+
return R->getVPDefID() == VPRecipeBase::VPWidenSC;
11181115
}
11191116

11201117
static inline bool classof(const VPUser *U) {
@@ -1139,54 +1136,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
11391136
#endif
11401137
};
11411138

1142-
/// A recipe for widening operations with vector-predication intrinsics with
1143-
/// explicit vector length (EVL).
1144-
class VPWidenEVLRecipe : public VPWidenRecipe {
1145-
using VPRecipeWithIRFlags::transferFlags;
1146-
1147-
public:
1148-
template <typename IterT>
1149-
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1150-
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1151-
addOperand(&EVL);
1152-
}
1153-
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
1154-
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1155-
transferFlags(W);
1156-
}
1157-
1158-
~VPWidenEVLRecipe() override = default;
1159-
1160-
VPWidenRecipe *clone() override final {
1161-
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1162-
return nullptr;
1163-
}
1164-
1165-
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1166-
1167-
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1168-
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1169-
1170-
/// Produce a vp-intrinsic using the opcode and operands of the recipe,
1171-
/// processing EVL elements.
1172-
void execute(VPTransformState &State) override final;
1173-
1174-
/// Returns true if the recipe only uses the first lane of operand \p Op.
1175-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1176-
assert(is_contained(operands(), Op) &&
1177-
"Op must be an operand of the recipe");
1178-
// EVL in that recipe is always the last operand, thus any use before means
1179-
// the VPValue should be vectorized.
1180-
return getEVL() == Op;
1181-
}
1182-
1183-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1184-
/// Print the recipe.
1185-
void print(raw_ostream &O, const Twine &Indent,
1186-
VPSlotTracker &SlotTracker) const override final;
1187-
#endif
1188-
};
1189-
11901139
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
11911140
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
11921141
/// Cast instruction opcode.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
251251
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
252252
return inferScalarType(R->getOperand(0));
253253
})
254-
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
255-
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
256-
VPWidenSelectRecipe>(
254+
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
255+
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
257256
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
258257
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
259258
return R->getResultType();

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,6 @@ struct Recipe_match {
155155
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
156156
return false;
157157

158-
if (!(std::is_same_v<VPWidenEVLRecipe, RecipeTys> || ...) &&
159-
isa<VPWidenEVLRecipe>(R)) {
160-
// Don't match VPWidenEVLRecipe if it is not explicitly part of RecipeTys.
161-
// Otherwise we might match it unexpectedly when trying to match
162-
// VPWidenRecipe, of which VPWidenEVLRecipe is a subclass of.
163-
return false;
164-
}
165-
166158
assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
167159
"recipe with matched opcode the expected number of operands");
168160

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
8686
case VPWidenLoadSC:
8787
case VPWidenPHISC:
8888
case VPWidenSC:
89-
case VPWidenEVLSC:
9089
case VPWidenSelectSC: {
9190
const Instruction *I =
9291
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -132,7 +131,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
132131
case VPWidenIntOrFpInductionSC:
133132
case VPWidenPHISC:
134133
case VPWidenSC:
135-
case VPWidenEVLSC:
136134
case VPWidenSelectSC: {
137135
const Instruction *I =
138136
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -173,7 +171,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
173171
case VPWidenPHISC:
174172
case VPWidenPointerInductionSC:
175173
case VPWidenSC:
176-
case VPWidenEVLSC:
177174
case VPWidenSelectSC: {
178175
const Instruction *I =
179176
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -1602,42 +1599,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
16021599
}
16031600
}
16041601

1605-
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1606-
unsigned Opcode = getOpcode();
1607-
// TODO: Support other opcodes
1608-
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1609-
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1610-
1611-
State.setDebugLocFrom(getDebugLoc());
1612-
1613-
assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1614-
"VPWidenEVLRecipe should not be used for scalars");
1615-
1616-
VPValue *EVL = getEVL();
1617-
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1618-
IRBuilderBase &BuilderIR = State.Builder;
1619-
VectorBuilder Builder(BuilderIR);
1620-
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1621-
1622-
SmallVector<Value *, 4> Ops;
1623-
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1624-
VPValue *VPOp = getOperand(I);
1625-
Ops.push_back(State.get(VPOp));
1626-
}
1627-
1628-
Builder.setMask(Mask).setEVL(EVLArg);
1629-
Value *VPInst =
1630-
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1631-
// Currently vp-intrinsics only accept FMF flags.
1632-
// TODO: Enable other flags when support is added.
1633-
if (isa<FPMathOperator>(VPInst))
1634-
setFlags(cast<Instruction>(VPInst));
1635-
1636-
State.set(this, VPInst);
1637-
State.addMetadata(VPInst,
1638-
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1639-
}
1640-
16411602
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16421603
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
16431604
VPSlotTracker &SlotTracker) const {
@@ -1647,15 +1608,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
16471608
printFlags(O);
16481609
printOperands(O, SlotTracker);
16491610
}
1650-
1651-
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1652-
VPSlotTracker &SlotTracker) const {
1653-
O << Indent << "WIDEN ";
1654-
printAsOperand(O, SlotTracker);
1655-
O << " = vp." << Instruction::getOpcodeName(getOpcode());
1656-
printFlags(O);
1657-
printOperands(O, SlotTracker);
1658-
}
16591611
#endif
16601612

16611613
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,40 +1662,10 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
16621662
VPValue *NewMask = GetNewMask(S->getMask());
16631663
return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
16641664
})
1665-
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
1666-
unsigned Opcode = W->getOpcode();
1667-
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1668-
return nullptr;
1669-
return new VPWidenEVLRecipe(*W, EVL);
1670-
})
16711665
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
16721666
VPValue *NewMask = GetNewMask(Red->getCondOp());
16731667
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
16741668
})
1675-
.Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>(
1676-
[&](auto *CR) -> VPRecipeBase * {
1677-
Intrinsic::ID VPID;
1678-
if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(CR)) {
1679-
VPID =
1680-
VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID());
1681-
} else {
1682-
auto *CastR = cast<VPWidenCastRecipe>(CR);
1683-
VPID = VPIntrinsic::getForOpcode(CastR->getOpcode());
1684-
}
1685-
1686-
// Not all intrinsics have a corresponding VP intrinsic.
1687-
if (VPID == Intrinsic::not_intrinsic)
1688-
return nullptr;
1689-
assert(VPIntrinsic::getMaskParamPos(VPID) &&
1690-
VPIntrinsic::getVectorLengthParamPos(VPID) &&
1691-
"Expected VP intrinsic to have mask and EVL");
1692-
1693-
SmallVector<VPValue *> Ops(CR->operands());
1694-
Ops.push_back(&AllOneMask);
1695-
Ops.push_back(&EVL);
1696-
return new VPWidenIntrinsicRecipe(
1697-
VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());
1698-
})
16991669
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
17001670
SmallVector<VPValue *> Ops(Sel->operands());
17011671
Ops.push_back(&EVL);

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,6 @@ class VPDef {
351351
VPWidenStoreEVLSC,
352352
VPWidenStoreSC,
353353
VPWidenSC,
354-
VPWidenEVLSC,
355354
VPWidenSelectSC,
356355
VPBlendSC,
357356
VPHistogramSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
145145
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
146146
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
147147
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
148-
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
149-
return VerifyEVLUse(*W,
150-
Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
151-
})
152148
.Case<VPScalarCastRecipe>(
153149
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
154150
.Case<VPInstruction>([&](const VPInstruction *I) {
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
3+
4+
; Check that EVL tail folded loops from the loop vectorizer are able to have the
5+
; vl of non-VP instructions reduced.
6+
define void @evl_tail_folded(ptr %p, ptr %q) {
7+
; CHECK-LABEL: evl_tail_folded:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: li a2, 0
10+
; CHECK-NEXT: csrr a3, vlenb
11+
; CHECK-NEXT: srli a3, a3, 2
12+
; CHECK-NEXT: addi a4, a3, 1023
13+
; CHECK-NEXT: neg a5, a3
14+
; CHECK-NEXT: and a4, a4, a5
15+
; CHECK-NEXT: li a5, 1024
16+
; CHECK-NEXT: .LBB0_1: # %vector.body
17+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
18+
; CHECK-NEXT: sub a6, a5, a2
19+
; CHECK-NEXT: slli a7, a2, 3
20+
; CHECK-NEXT: vsetvli a6, a6, e64, m2, ta, ma
21+
; CHECK-NEXT: add t0, a0, a7
22+
; CHECK-NEXT: vle64.v v8, (t0)
23+
; CHECK-NEXT: sub a4, a4, a3
24+
; CHECK-NEXT: add a7, a1, a7
25+
; CHECK-NEXT: vadd.vi v8, v8, 1
26+
; CHECK-NEXT: vse64.v v8, (a7)
27+
; CHECK-NEXT: add a2, a2, a6
28+
; CHECK-NEXT: bnez a4, .LBB0_1
29+
; CHECK-NEXT: # %bb.2: # %exit
30+
; CHECK-NEXT: ret
31+
entry:
32+
%0 = tail call i64 @llvm.vscale.i64()
33+
%1 = shl i64 %0, 1
34+
%n.rnd.up = add i64 %1, 1023
35+
%n.mod.vf = urem i64 %n.rnd.up, %1
36+
%n.vec = sub i64 %n.rnd.up, %n.mod.vf
37+
%2 = tail call i64 @llvm.vscale.i64()
38+
%3 = shl i64 %2, 1
39+
br label %vector.body
40+
41+
vector.body:
42+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
43+
%evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ]
44+
%avl = sub i64 1024, %evl.based.iv
45+
%4 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true)
46+
%5 = getelementptr i64, ptr %p, i64 %evl.based.iv
47+
%vp.op.load = tail call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %5, <vscale x 2 x i1> splat (i1 true), i32 %4)
48+
%6 = add <vscale x 2 x i64> %vp.op.load, splat (i64 1)
49+
%7 = getelementptr i64, ptr %q, i64 %evl.based.iv
50+
tail call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %6, ptr %7, <vscale x 2 x i1> splat (i1 true), i32 %4)
51+
%8 = zext i32 %4 to i64
52+
%index.evl.next = add i64 %evl.based.iv, %8
53+
%index.next = add i64 %index, %3
54+
%9 = icmp eq i64 %index.next, %n.vec
55+
br i1 %9, label %exit, label %vector.body
56+
57+
exit:
58+
ret void
59+
}

llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
143143
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]]
144144
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
145145
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
146-
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i16(<vscale x 4 x i16> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
147-
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
146+
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[VP_OP_LOAD]] to <vscale x 4 x i32>
147+
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]]
148148
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
149149
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
150150
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
@@ -200,7 +200,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
200200
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
201201
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
202202
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
203-
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
203+
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
204204
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
205205
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
206206
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
2727
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
2828
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
2929
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
30-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> [[VP_OP_LOAD]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
31-
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> [[TMP7]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
32-
; CHECK-NEXT: [[VP_OP1:%.*]] = call <vscale x 1 x i16> @llvm.vp.lshr.nxv1i16(<vscale x 1 x i16> [[VP_OP]], <vscale x 1 x i16> trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>), <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
33-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> [[VP_OP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
30+
; CHECK-NEXT: [[TMP7:%.*]] = zext <vscale x 1 x i8> [[VP_OP_LOAD]] to <vscale x 1 x i16>
31+
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 1 x i16> zeroinitializer, [[TMP7]]
32+
; CHECK-NEXT: [[VP_OP1:%.*]] = lshr <vscale x 1 x i16> [[TMP12]], trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>)
33+
; CHECK-NEXT: [[TMP8:%.*]] = trunc <vscale x 1 x i16> [[VP_OP1]] to <vscale x 1 x i8>
3434
; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> [[TMP8]], <vscale x 1 x ptr> align 1 zeroinitializer, <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
3535
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
3636
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]

llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,15 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
4444
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
4545
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
4646
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]]
47-
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
48-
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
49-
; CHECK-NEXT: [[VP_OP2:%.*]] = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
50-
; CHECK-NEXT: [[VP_OP3:%.*]] = call <vscale x 8 x i32> @llvm.vp.or.nxv8i32(<vscale x 8 x i32> [[VP_OP2]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
47+
; CHECK-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[VP_OP_LOAD]] to <vscale x 8 x i32>
48+
; CHECK-NEXT: [[VP_OP:%.*]] = mul <vscale x 8 x i32> [[TMP15]], zeroinitializer
49+
; CHECK-NEXT: [[TMP23:%.*]] = ashr <vscale x 8 x i32> [[TMP15]], zeroinitializer
50+
; CHECK-NEXT: [[VP_OP3:%.*]] = or <vscale x 8 x i32> [[TMP23]], zeroinitializer
5151
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer
5252
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer, i32 [[TMP11]])
53-
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> [[TMP17]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
54-
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP18]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
55-
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i32(<vscale x 8 x i32> [[VP_OP]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
53+
; CHECK-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i32> [[TMP17]] to <vscale x 8 x i8>
54+
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP24]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
55+
; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 8 x i32> [[VP_OP]] to <vscale x 8 x i16>
5656
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> [[TMP19]], <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
5757
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
5858
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]

0 commit comments

Comments
 (0)