Skip to content

[VPlan] Don't convert widen recipes to VP intrinsics in EVL transform #127180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 1 addition & 52 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
Expand Down Expand Up @@ -710,7 +709,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
Expand Down Expand Up @@ -1113,8 +1111,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
}

static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
return R->getVPDefID() == VPRecipeBase::VPWidenSC;
}

static inline bool classof(const VPUser *U) {
Expand All @@ -1139,54 +1136,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
#endif
};

/// A recipe for widening operations with vector-predication intrinsics with
/// explicit vector length (EVL).
class VPWidenEVLRecipe : public VPWidenRecipe {
using VPRecipeWithIRFlags::transferFlags;

public:
template <typename IterT>
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
addOperand(&EVL);
}
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
transferFlags(W);
}

~VPWidenEVLRecipe() override = default;

VPWidenRecipe *clone() override final {
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
return nullptr;
}

VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);

VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }

/// Produce a vp-intrinsic using the opcode and operands of the recipe,
/// processing EVL elements.
void execute(VPTransformState &State) override final;

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// EVL in that recipe is always the last operand, thus any use before means
// the VPValue should be vectorized.
return getEVL() == Op;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override final;
#endif
};

/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
return R->getResultType();
Expand Down
8 changes: 0 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,6 @@ struct Recipe_match {
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
return false;

if (!(std::is_same_v<VPWidenEVLRecipe, RecipeTys> || ...) &&
isa<VPWidenEVLRecipe>(R)) {
// Don't match VPWidenEVLRecipe if it is not explicitly part of RecipeTys.
// Otherwise we might match it unexpectedly when trying to match
// VPWidenRecipe, of which VPWidenEVLRecipe is a subclass of.
return false;
}

assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
"recipe with matched opcode the expected number of operands");

Expand Down
48 changes: 0 additions & 48 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -132,7 +131,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -173,7 +171,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPHISC:
case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -1602,42 +1599,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
}
}

void VPWidenEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
// TODO: Support other opcodes
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");

State.setDebugLocFrom(getDebugLoc());

assert(State.get(getOperand(0))->getType()->isVectorTy() &&
"VPWidenEVLRecipe should not be used for scalars");

VPValue *EVL = getEVL();
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
IRBuilderBase &BuilderIR = State.Builder;
VectorBuilder Builder(BuilderIR);
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());

SmallVector<Value *, 4> Ops;
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
VPValue *VPOp = getOperand(I);
Ops.push_back(State.get(VPOp));
}

Builder.setMask(Mask).setEVL(EVLArg);
Value *VPInst =
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
// Currently vp-intrinsics only accept FMF flags.
// TODO: Enable other flags when support is added.
if (isa<FPMathOperator>(VPInst))
setFlags(cast<Instruction>(VPInst));

State.set(this, VPInst);
State.addMetadata(VPInst,
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand All @@ -1647,15 +1608,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
printFlags(O);
printOperands(O, SlotTracker);
}

void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
O << " = vp." << Instruction::getOpcodeName(getOpcode());
printFlags(O);
printOperands(O, SlotTracker);
}
#endif

void VPWidenCastRecipe::execute(VPTransformState &State) {
Expand Down
30 changes: 0 additions & 30 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,40 +1662,10 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
VPValue *NewMask = GetNewMask(S->getMask());
return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
})
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
unsigned Opcode = W->getOpcode();
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
})
.Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>(
[&](auto *CR) -> VPRecipeBase * {
Intrinsic::ID VPID;
if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(CR)) {
VPID =
VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID());
} else {
auto *CastR = cast<VPWidenCastRecipe>(CR);
VPID = VPIntrinsic::getForOpcode(CastR->getOpcode());
}

// Not all intrinsics have a corresponding VP intrinsic.
if (VPID == Intrinsic::not_intrinsic)
return nullptr;
assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic to have mask and EVL");

SmallVector<VPValue *> Ops(CR->operands());
Ops.push_back(&AllOneMask);
Ops.push_back(&EVL);
return new VPWidenIntrinsicRecipe(
VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());
})
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
SmallVector<VPValue *> Ops(Sel->operands());
Ops.push_back(&EVL);
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,6 @@ class VPDef {
VPWidenStoreEVLSC,
VPWidenStoreSC,
VPWidenSC,
VPWidenEVLSC,
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
return VerifyEVLUse(*W,
Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
.Case<VPScalarCastRecipe>(
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
.Case<VPInstruction>([&](const VPInstruction *I) {
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt-evl-tail-folding.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s

; Check that EVL tail folded loops from the loop vectorizer are able to have the
; vl of non-VP instructions reduced.
define void @evl_tail_folded(ptr %p, ptr %q) {
; CHECK-LABEL: evl_tail_folded:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: srli a3, a3, 2
; CHECK-NEXT: addi a4, a3, 1023
; CHECK-NEXT: neg a5, a3
; CHECK-NEXT: and a4, a4, a5
; CHECK-NEXT: li a5, 1024
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub a6, a5, a2
; CHECK-NEXT: slli a7, a2, 3
; CHECK-NEXT: vsetvli a6, a6, e64, m2, ta, ma
; CHECK-NEXT: add t0, a0, a7
; CHECK-NEXT: vle64.v v8, (t0)
; CHECK-NEXT: sub a4, a4, a3
; CHECK-NEXT: add a7, a1, a7
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: vse64.v v8, (a7)
; CHECK-NEXT: add a2, a2, a6
; CHECK-NEXT: bnez a4, .LBB0_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl i64 %0, 1
%n.rnd.up = add i64 %1, 1023
%n.mod.vf = urem i64 %n.rnd.up, %1
%n.vec = sub i64 %n.rnd.up, %n.mod.vf
%2 = tail call i64 @llvm.vscale.i64()
%3 = shl i64 %2, 1
br label %vector.body

vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ]
%avl = sub i64 1024, %evl.based.iv
%4 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true)
%5 = getelementptr i64, ptr %p, i64 %evl.based.iv
%vp.op.load = tail call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %5, <vscale x 2 x i1> splat (i1 true), i32 %4)
%6 = add <vscale x 2 x i64> %vp.op.load, splat (i64 1)
%7 = getelementptr i64, ptr %q, i64 %evl.based.iv
tail call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %6, ptr %7, <vscale x 2 x i1> splat (i1 true), i32 %4)
%8 = zext i32 %4 to i64
%index.evl.next = add i64 %evl.based.iv, %8
%index.next = add i64 %index, %3
%9 = icmp eq i64 %index.next, %n.vec
br i1 %9, label %exit, label %vector.body

exit:
ret void
}
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]]
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i16(<vscale x 4 x i16> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[VP_OP_LOAD]] to <vscale x 4 x i32>
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]]
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
Expand Down Expand Up @@ -200,7 +200,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> [[VP_OP_LOAD]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> [[TMP7]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[VP_OP1:%.*]] = call <vscale x 1 x i16> @llvm.vp.lshr.nxv1i16(<vscale x 1 x i16> [[VP_OP]], <vscale x 1 x i16> trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>), <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> [[VP_OP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP7:%.*]] = zext <vscale x 1 x i8> [[VP_OP_LOAD]] to <vscale x 1 x i16>
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 1 x i16> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[VP_OP1:%.*]] = lshr <vscale x 1 x i16> [[TMP12]], trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>)
; CHECK-NEXT: [[TMP8:%.*]] = trunc <vscale x 1 x i16> [[VP_OP1]] to <vscale x 1 x i8>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> [[TMP8]], <vscale x 1 x ptr> align 1 zeroinitializer, <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP2:%.*]] = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP3:%.*]] = call <vscale x 8 x i32> @llvm.vp.or.nxv8i32(<vscale x 8 x i32> [[VP_OP2]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[VP_OP_LOAD]] to <vscale x 8 x i32>
; CHECK-NEXT: [[VP_OP:%.*]] = mul <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = ashr <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[VP_OP3:%.*]] = or <vscale x 8 x i32> [[TMP23]], zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer, i32 [[TMP11]])
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> [[TMP17]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP18]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i32(<vscale x 8 x i32> [[VP_OP]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i32> [[TMP17]] to <vscale x 8 x i8>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP24]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 8 x i32> [[VP_OP]] to <vscale x 8 x i16>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> [[TMP19]], <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
Expand Down
Loading
Loading