-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VP][RISCV] Add a vp.load.ff intrinsic for fault only first load. #128593
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Seems there's been some interested in supporting early-exit loops recently. https://discourse.llvm.org/t/rfc-supporting-more-early-exit-loops/84690 This patch was extracted from our downstream where we've been using it in our vectorizer. Still need to write up LangRef. Type legalization is also missing.
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesSeems there's been some interested in supporting early-exit loops recently. https://discourse.llvm.org/t/rfc-supporting-more-early-exit-loops/84690 This patch was extracted from our downstream where we've been using it in our vectorizer. Still need to write up LangRef. Type legalization is also missing. Patch is 92.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128593.diff 13 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index aa0dfbe666cde..b00b939ab2afc 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1572,6 +1572,8 @@ class SelectionDAG {
SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
ISD::MemIndexType IndexType);
+ SDValue getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachineMemOperand *MMO);
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT,
MachineMemOperand *MMO);
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 20283ad8f2689..007055d88424b 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -3057,6 +3057,23 @@ class MaskedHistogramSDNode : public MaskedGatherScatterSDNode {
}
};
+class VPLoadFFSDNode : public MemSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPLoadFFSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MemSDNode(ISD::VP_LOAD_FF, Order, dl, VTs, MemVT, MMO) {}
+
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getMask() const { return getOperand(2); }
+ const SDValue &getVectorLength() const { return getOperand(3); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_LOAD_FF;
+ }
+};
+
class FPStateAccessSDNode : public MemSDNode {
public:
friend class SelectionDAG;
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 14ecae41ff08f..0c26c7bcfbad8 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1911,6 +1911,12 @@ def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
llvm_i32_ty],
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
+def int_vp_load_ff : DefaultAttrsIntrinsic<[ llvm_anyvector_ty, llvm_i32_ty ],
+ [ llvm_anyptr_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
+
def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
[ LLVMVectorOfAnyPointersToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 55f4719da7c8b..4a71097226f18 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -587,6 +587,12 @@ VP_PROPERTY_FUNCTIONAL_OPC(Load)
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load)
END_REGISTER_VP(vp_load, VP_LOAD)
+BEGIN_REGISTER_VP_INTRINSIC(vp_load_ff, 1, 2)
+// val,chain = VP_LOAD_FF chain,base,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_LOAD_FF, -1, vp_load_ff, 2, 3)
+HELPER_MAP_VPID_TO_VPSD(vp_load_ff, VP_LOAD_FF)
+VP_PROPERTY_NO_FUNCTIONAL
+END_REGISTER_VP(vp_load_ff, VP_LOAD_FF)
// llvm.experimental.vp.strided.load(ptr,stride,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3)
// chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9e61df7047d4a..ff6ef4d02c520 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -10139,6 +10139,34 @@ SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT,
return V;
}
+SDValue SelectionDAG::getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO) {
+ SDVTList VTs = getVTList(VT, EVL.getValueType(), MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD_FF, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadFFSDNode>(dl.getIROrder(),
+ VTs, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPLoadFFSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadFFSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
EVT MemVT, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1c58a7f05446c..a287bdeb1eb90 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8462,6 +8462,35 @@ void SelectionDAGBuilder::visitVPLoad(
setValue(&VPIntrin, LD);
}
+void SelectionDAGBuilder::visitVPLoadFF(
+ const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
+ const SmallVectorImpl<SDValue> &OpValues) {
+ assert(OpValues.size() == 3);
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ bool AddToChain = true;
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1));
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(2));
+ setValue(&VPIntrin, DAG.getMergeValues({LD.getValue(0), Trunc}, DL));
+}
+
void SelectionDAGBuilder::visitVPGather(
const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues) {
@@ -8695,6 +8724,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
case ISD::VP_LOAD:
visitVPLoad(VPIntrin, ValueVTs[0], OpValues);
break;
+ case ISD::VP_LOAD_FF:
+ visitVPLoadFF(VPIntrin, ValueVTs[0], ValueVTs[1], OpValues);
+ break;
case ISD::VP_GATHER:
visitVPGather(VPIntrin, ValueVTs[0], OpValues);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8496f8ae78ce6..b30695876828d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -632,6 +632,8 @@ class SelectionDAGBuilder {
void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPLoadFF(const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
const SmallVectorImpl<SDValue> &OpValues);
void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 256bce1abe71f..7ddea32f57f02 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -448,6 +448,7 @@ VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) {
case Intrinsic::experimental_vp_strided_store:
return 1;
case Intrinsic::vp_load:
+ case Intrinsic::vp_load_ff:
case Intrinsic::vp_gather:
case Intrinsic::experimental_vp_strided_load:
return 0;
@@ -671,6 +672,10 @@ Function *VPIntrinsic::getOrInsertDeclarationForParams(
VPFunc = Intrinsic::getOrInsertDeclaration(
M, VPID, {ReturnType, Params[0]->getType()});
break;
+ case Intrinsic::vp_load_ff:
+ VPFunc = Intrinsic::getOrInsertDeclaration(
+ M, VPID, {ReturnType->getStructElementType(0), Params[0]->getType()});
+ break;
case Intrinsic::experimental_vp_strided_load:
VPFunc = Intrinsic::getOrInsertDeclaration(
M, VPID, {ReturnType, Params[0]->getType(), Params[1]->getType()});
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6076fe56416ad..3da92c0f43590 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -880,6 +880,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
@@ -1031,6 +1032,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -1101,6 +1103,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
@@ -1269,6 +1272,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
@@ -1357,6 +1361,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
@@ -7616,6 +7621,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::MLOAD:
case ISD::VP_LOAD:
return lowerMaskedLoad(Op, DAG);
+ case ISD::VP_LOAD_FF:
+ return lowerLoadFF(Op, DAG);
case ISD::MSTORE:
case ISD::VP_STORE:
return lowerMaskedStore(Op, DAG);
@@ -11965,6 +11972,57 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
return DAG.getMergeValues({Result, Chain}, DL);
}
+SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getResNo() == 0);
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
+ EVT MemVT = VPLoadFF->getMemoryVT();
+ MachineMemOperand *MMO = VPLoadFF->getMemOperand();
+ SDValue Chain = VPLoadFF->getChain();
+ SDValue BasePtr = VPLoadFF->getBasePtr();
+
+ SDValue Mask = VPLoadFF->getMask();
+ SDValue VL = VPLoadFF->getVectorLength();
+
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ if (!IsUnmasked) {
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ unsigned IntID =
+ IsUnmasked ? Intrinsic::riscv_vleff : Intrinsic::riscv_vleff_mask;
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ Ops.push_back(BasePtr);
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+ if (!IsUnmasked)
+ Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
+
+ SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
+
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
+ SDValue OutVL = Result.getValue(1);
+ Chain = Result.getValue(2);
+
+ if (VT.isFixedLengthVector())
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+
+ return DAG.getMergeValues({Result, OutVL, Chain}, DL);
+}
+
SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 26b888653c81d..8bba8c50ba862 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -991,6 +991,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLoadFF(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
new file mode 100644
index 0000000000000..9f982293256ac
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
@@ -0,0 +1,633 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+declare { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr, <2 x i1>, i32)
+
+define { <2 x i8>, i32 } @vploadff_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x i8>, i32 } %load
+}
+
+define { <2 x i8>, i32 } @vploadff_v2i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x i8>, i32 } %load
+}
+
+declare { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr, <4 x i1>, i32)
+
+define { <4 x i8>, i32 } @vploadff_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x i8>, i32 } %load
+}
+
+define { <4 x i8>, i32 } @vploadff_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x i8>, i32 } %load
+}
+
+declare { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr, <8 x i1>, i32)
+
+define { <8 x i8>, i32 } @vploadff_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x i8>, i32 } %load
+}
+
+define { <8 x i8>, i32 } @vploadff_v8i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x i8>, i32 } %load
+}
+
+declare { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr, <2 x i1>, i32)
+
+define { <2 x i16>, i32 } @vploadff_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x i16>, i32 } %load
+}
+
+define { <2 x i16>, i32 } @vploadff_v2i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x i16>, i32 } %load
+}
+
+declare { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr, <4 x i1>, i32)
+
+define { <4 x i16>, i32 } @vploadff_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret...
[truncated]
|
llvm/include/llvm/IR/Intrinsics.td
Outdated
@@ -1911,6 +1911,12 @@ def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty], | |||
llvm_i32_ty], | |||
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; | |||
|
|||
def int_vp_load_ff : DefaultAttrsIntrinsic<[ llvm_anyvector_ty, llvm_i32_ty ], |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This may not match SVE because they just change the FFR predicate register. But I haven't come up with a representation that can represent two kinds of semantics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think in this RFC, they mentioned that having two variants—one for RVV and one for SVE—is acceptable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But if so, it is still target-dependent. Anyway, I'd like to see this PR going forward.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, where is the non-vp version? Or does it exist? :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Looking forward to this patch, as it helps relax restrictions on undefined behavior caused by out-of-bounds loads in early-exit loops (PR #120603) |
@@ -587,6 +587,12 @@ VP_PROPERTY_FUNCTIONAL_OPC(Load) | |||
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load) | |||
END_REGISTER_VP(vp_load, VP_LOAD) | |||
|
|||
BEGIN_REGISTER_VP_INTRINSIC(vp_load_ff, 1, 2) | |||
// val,chain = VP_LOAD_FF chain,base,mask,evl |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was going to ask why this doesn't have an offset operand like the others so it could inherit from VPBaseLoadStoreSDNode
, but then I realised we don't use the offset in RISCVISelLowering for regular vp.load/store.
SelectionDAGBuilder doesn't set it, I think it's always undef. Not for this PR but can we remove it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was going to ask why this doesn't have an offset operand like the others so it could inherit from
VPBaseLoadStoreSDNode
, but then I realised we don't use the offset in RISCVISelLowering for regular vp.load/store.SelectionDAGBuilder doesn't set it, I think it's always undef. Not for this PR but can we remove it?
I think we can probably remove Offset and Addressing mode until a target comes along that needs them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Seems there's been some interested in supporting early-exit loops recently. https://discourse.llvm.org/t/rfc-supporting-more-early-exit-loops/84690
This patch was extracted from our downstream where we've been using it in our vectorizer.
Still need to write up LangRef. Type legalization is also missing.