diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 2da154c300344..57702aa50774b 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1710,20 +1710,6 @@ let TargetPrefix = "riscv" in { defm vsuxseg # nf : RISCVISegStore; } - // Strided loads/stores for fixed vectors. - def int_riscv_masked_strided_load - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_anyptr_ty, - llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [NoCapture>, IntrReadMem]>; - def int_riscv_masked_strided_store - : DefaultAttrsIntrinsic<[], - [llvm_anyvector_ty, llvm_anyptr_ty, - llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [NoCapture>, IntrWriteMem]>; - // Segment loads/stores for fixed vectors. foreach nf = [2, 3, 4, 5, 6, 7, 8] in { def int_riscv_seg # nf # _load diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index d9971791a2cfa..881be28bfe79e 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -515,17 +515,23 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II, Builder.SetInsertPoint(II); + Value *EVL = Builder.CreateElementCount( + IntegerType::get(Ctx, 32), cast(DataType)->getElementCount()); + CallInst *Call; - if (II->getIntrinsicID() == Intrinsic::masked_gather) + if (II->getIntrinsicID() == Intrinsic::masked_gather) { Call = Builder.CreateIntrinsic( - Intrinsic::riscv_masked_strided_load, + Intrinsic::experimental_vp_strided_load, {DataType, BasePtr->getType(), Stride->getType()}, - {II->getArgOperand(3), BasePtr, Stride, II->getArgOperand(2)}); - else + {BasePtr, Stride, II->getArgOperand(2), EVL}); + Call = Builder.CreateIntrinsic( + Intrinsic::vp_select, {DataType}, + {II->getOperand(2), Call, II->getArgOperand(3), EVL}); + } else Call = Builder.CreateIntrinsic( - Intrinsic::riscv_masked_strided_store, + Intrinsic::experimental_vp_strided_store, {DataType, BasePtr->getType(), Stride->getType()}, - {II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3)}); + {II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3), EVL}); Call->takeName(II); II->replaceAllUsesWith(Call); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index dd7b0b4ed5ef7..d40d4997d7614 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1622,12 +1622,6 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; - case Intrinsic::riscv_masked_strided_load: - return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, - /*IsUnitStrided*/ false); - case Intrinsic::riscv_masked_strided_store: - return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, - /*IsUnitStrided*/ false); case Intrinsic::riscv_seg2_load: case Intrinsic::riscv_seg3_load: case Intrinsic::riscv_seg4_load: @@ -9414,81 +9408,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, switch (IntNo) { default: break; - case Intrinsic::riscv_masked_strided_load: { - SDLoc DL(Op); - MVT XLenVT = Subtarget.getXLenVT(); - - // If the mask is known to be all ones, optimize to an unmasked intrinsic; - // the selection of the masked intrinsics doesn't do this for us. - SDValue Mask = Op.getOperand(5); - bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); - - MVT VT = Op->getSimpleValueType(0); - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) - ContainerVT = getContainerForFixedLengthVector(VT); - - SDValue PassThru = Op.getOperand(2); - if (!IsUnmasked) { - MVT MaskVT = getMaskTypeFor(ContainerVT); - if (VT.isFixedLengthVector()) { - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); - } - } - - auto *Load = cast(Op); - SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; - SDValue Ptr = Op.getOperand(3); - SDValue Stride = Op.getOperand(4); - SDValue Result, Chain; - - // TODO: We restrict this to unmasked loads currently in consideration of - // the complexity of handling all falses masks. - MVT ScalarVT = ContainerVT.getVectorElementType(); - if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) { - SDValue ScalarLoad = - DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr, - ScalarVT, Load->getMemOperand()); - Chain = ScalarLoad.getValue(1); - Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, - Subtarget); - } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) { - SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr, - Load->getMemOperand()); - Chain = ScalarLoad.getValue(1); - Result = DAG.getSplat(ContainerVT, DL, ScalarLoad); - } else { - SDValue IntID = DAG.getTargetConstant( - IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, - XLenVT); - - SmallVector Ops{Load->getChain(), IntID}; - if (IsUnmasked) - Ops.push_back(DAG.getUNDEF(ContainerVT)); - else - Ops.push_back(PassThru); - Ops.push_back(Ptr); - Ops.push_back(Stride); - if (!IsUnmasked) - Ops.push_back(Mask); - Ops.push_back(VL); - if (!IsUnmasked) { - SDValue Policy = - DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); - Ops.push_back(Policy); - } - - SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); - Result = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, - Load->getMemoryVT(), Load->getMemOperand()); - Chain = Result.getValue(1); - } - if (VT.isFixedLengthVector()) - Result = convertFromScalableVector(VT, Result, DAG, Subtarget); - return DAG.getMergeValues({Result, Chain}, DL); - } case Intrinsic::riscv_seg2_load: case Intrinsic::riscv_seg3_load: case Intrinsic::riscv_seg4_load: @@ -9568,47 +9487,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, switch (IntNo) { default: break; - case Intrinsic::riscv_masked_strided_store: { - SDLoc DL(Op); - MVT XLenVT = Subtarget.getXLenVT(); - - // If the mask is known to be all ones, optimize to an unmasked intrinsic; - // the selection of the masked intrinsics doesn't do this for us. - SDValue Mask = Op.getOperand(5); - bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); - - SDValue Val = Op.getOperand(2); - MVT VT = Val.getSimpleValueType(); - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(VT); - Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); - } - if (!IsUnmasked) { - MVT MaskVT = getMaskTypeFor(ContainerVT); - if (VT.isFixedLengthVector()) - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - } - - SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; - - SDValue IntID = DAG.getTargetConstant( - IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, - XLenVT); - - auto *Store = cast(Op); - SmallVector Ops{Store->getChain(), IntID}; - Ops.push_back(Val); - Ops.push_back(Op.getOperand(3)); // Ptr - Ops.push_back(Op.getOperand(4)); // Stride - if (!IsUnmasked) - Ops.push_back(Mask); - Ops.push_back(VL); - - return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), - Ops, Store->getMemoryVT(), - Store->getMemOperand()); - } case Intrinsic::riscv_seg2_store: case Intrinsic::riscv_seg3_store: case Intrinsic::riscv_seg4_store: @@ -17551,43 +17429,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // By default we do not combine any intrinsic. default: return SDValue(); - case Intrinsic::riscv_masked_strided_load: { - MVT VT = N->getSimpleValueType(0); - auto *Load = cast(N); - SDValue PassThru = N->getOperand(2); - SDValue Base = N->getOperand(3); - SDValue Stride = N->getOperand(4); - SDValue Mask = N->getOperand(5); - - // If the stride is equal to the element size in bytes, we can use - // a masked.load. - const unsigned ElementSize = VT.getScalarStoreSize(); - if (auto *StrideC = dyn_cast(Stride); - StrideC && StrideC->getZExtValue() == ElementSize) - return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base, - DAG.getUNDEF(XLenVT), Mask, PassThru, - Load->getMemoryVT(), Load->getMemOperand(), - ISD::UNINDEXED, ISD::NON_EXTLOAD); - return SDValue(); - } - case Intrinsic::riscv_masked_strided_store: { - auto *Store = cast(N); - SDValue Value = N->getOperand(2); - SDValue Base = N->getOperand(3); - SDValue Stride = N->getOperand(4); - SDValue Mask = N->getOperand(5); - - // If the stride is equal to the element size in bytes, we can use - // a masked.store. - const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); - if (auto *StrideC = dyn_cast(Stride); - StrideC && StrideC->getZExtValue() == ElementSize) - return DAG.getMaskedStore(Store->getChain(), DL, Value, Base, - DAG.getUNDEF(XLenVT), Mask, - Value.getValueType(), Store->getMemOperand(), - ISD::UNINDEXED, false); - return SDValue(); - } case Intrinsic::riscv_vcpop: case Intrinsic::riscv_vcpop_mask: case Intrinsic::riscv_vfirst: diff --git a/llvm/test/CodeGen/RISCV/pr89833.ll b/llvm/test/CodeGen/RISCV/pr89833.ll deleted file mode 100644 index 54a985040e758..0000000000000 --- a/llvm/test/CodeGen/RISCV/pr89833.ll +++ /dev/null @@ -1,16 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s - -declare void @llvm.riscv.masked.strided.store.nxv16i8.p0.i64(, ptr, i64, ) - -define void @test( %value, %mask) { -; CHECK-LABEL: test: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v8, 0 -; CHECK-NEXT: vse8.v v12, (zero), v0.t -; CHECK-NEXT: ret - %trunc = trunc %value to - call void @llvm.riscv.masked.strided.store.nxv16i8.p0.i64( %trunc, ptr null, i64 1, %mask) - ret void -} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll index ab5885a604443..d723c2f6df1af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll @@ -16,7 +16,8 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -58,7 +59,8 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> [[MASKEDOFF:%.*]], ptr [[TMP0]], i64 5, <32 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -100,7 +102,8 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 155, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 -5, <32 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> , i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -142,7 +145,8 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 0, <32 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> , i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -190,9 +194,10 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> ) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> , i32 32) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -232,9 +237,10 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> [[MASKEDOFF:%.*]], ptr [[TMP0]], i64 5, <32 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32) ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> ) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> , i32 32) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -276,7 +282,8 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -321,7 +328,8 @@ define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 [[TMP0]], <8 x i1> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -418,9 +426,10 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> ) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> , i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -468,8 +477,10 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[B]], i64 [[VEC_IND_SCALAR1]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> ) -; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 16, <8 x i1> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 4 ; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr [[I2]], i64 8 @@ -537,29 +548,37 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR11:%.*]] = phi i64 [ 12, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR13:%.*]] = phi i64 [ 3, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 64, <8 x i1> ) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 16, <8 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER52]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP1]], i64 16, <8 x i1> ) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP2]], i64 64, <8 x i1> ) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP3]], i64 16, <8 x i1> ) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP5]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP7]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER54]], [[WIDE_MASKED_GATHER53]] -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP3]], i64 16, <8 x i1> ) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP4]], i64 64, <8 x i1> ) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP5]], i64 16, <8 x i1> ) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP9]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]] +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP11]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER56]], [[WIDE_MASKED_GATHER55]] -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP5]], i64 16, <8 x i1> ) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP6]], i64 64, <8 x i1> ) -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP7]], i64 16, <8 x i1> ) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]] +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP13]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]] +; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP15]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER58]], [[WIDE_MASKED_GATHER57]] -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP7]], i64 16, <8 x i1> ) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> , i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 128 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 32 @@ -636,8 +655,10 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ] ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG1:%.*]], i64 [[I3_SCALAR]] ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[I3_SCALAR1]] -; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.riscv.masked.strided.load.v2p0.p0.i64(<2 x ptr> undef, ptr [[TMP0]], i64 40, <2 x i1> ) -; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.riscv.masked.strided.load.v2p0.p0.i64(<2 x ptr> undef, ptr [[TMP1]], i64 40, <2 x i1> ) +; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> , i32 2) +; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> , <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2) +; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> , i32 2) +; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> , <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2) ; V-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] ; V-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 ; V-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 @@ -717,8 +738,8 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; V-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8 ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i64 [[I3_SCALAR]] ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG]], i64 [[I3_SCALAR1]] -; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> ) -; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> ) +; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> , i32 2) +; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> , i32 2) ; V-NEXT: [[I15]] = add nuw i64 [[I]], 4 ; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20 ; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20 @@ -801,7 +822,8 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: [[I17_SCALAR:%.*]] = phi i64 [ [[START]], [[BB9]] ], [ [[I28_SCALAR:%.*]], [[BB15]] ] ; CHECK-NEXT: [[I18:%.*]] = add i64 [[I16]], [[I4]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I17_SCALAR]] -; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) +; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I18]] ; CHECK-NEXT: [[I24:%.*]] = load <32 x i8>, ptr [[I22]], align 1 ; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[I21]] @@ -909,7 +931,8 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr ; CHECK-NEXT: [[I5:%.*]] = phi i64 [ [[I13:%.*]], [[BB4]] ], [ 0, [[BB2]] ] ; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I6_SCALAR]] -; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.riscv.masked.strided.load.v16i8.p0.i64(<16 x i8> undef, ptr [[TMP0]], i64 5, <16 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> , i32 16) +; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> , <16 x i8> [[TMP1]], <16 x i8> undef, i32 16) ; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I5]] ; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, ptr [[I10]], align 1 ; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[I9]] @@ -951,8 +974,9 @@ bb16: ; preds = %bb4, %bb define <8 x i8> @broadcast_ptr_base(ptr %a) { ; CHECK-LABEL: @broadcast_ptr_base( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.riscv.masked.strided.load.v8i8.p0.i64(<8 x i8> poison, ptr [[A:%.*]], i64 64, <8 x i1> ) -; CHECK-NEXT: ret <8 x i8> [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> , <8 x i8> [[TMP0]], <8 x i8> poison, i32 8) +; CHECK-NEXT: ret <8 x i8> [[TMP1]] ; entry: %0 = insertelement <8 x ptr> poison, ptr %a, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll deleted file mode 100644 index f8a5b0f9d03a6..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll +++ /dev/null @@ -1,133 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s - -declare <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8>, ptr, i64, <32 x i1>) -declare <2 x i64> @llvm.riscv.masked.strided.load.v2i64.p0.i64(<2 x i64>, ptr, i64, <2 x i1>) - -declare void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8>, ptr, i64, <32 x i1>) -declare void @llvm.riscv.masked.strided.store.v2i64.p0.i64(<2 x i64>, ptr, i64, <2 x i1>) - -define <32 x i8> @strided_load_i8(ptr %p, i64 %stride, <32 x i1> %m) { -; CHECK-LABEL: strided_load_i8: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t -; CHECK-NEXT: ret - %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 %stride, <32 x i1> %m) - ret <32 x i8> %res -} - -define <2 x i64> @strided_load_i64(ptr %p, i64 %stride, <2 x i1> %m) { -; CHECK-LABEL: strided_load_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t -; CHECK-NEXT: ret - %res = call <2 x i64> @llvm.riscv.masked.strided.load.v2i64.p0.i64(<2 x i64> undef, ptr %p, i64 %stride, <2 x i1> %m) - ret <2 x i64> %res -} - -define <32 x i8> @strided_load_i8_splat(ptr %p, <32 x i1> %m) { -; CHECK-LABEL: strided_load_i8_splat: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlse8.v v8, (a0), zero, v0.t -; CHECK-NEXT: ret - %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 0, <32 x i1> %m) - ret <32 x i8> %res -} - -define <32 x i8> @strided_load_i8_reverse(ptr %p, <32 x i1> %m) { -; CHECK-LABEL: strided_load_i8_reverse: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: li a2, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t -; CHECK-NEXT: ret - %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 -1, <32 x i1> %m) - ret <32 x i8> %res -} - -define <32 x i8> @strided_load_i8_nostride(ptr %p, <32 x i1> %m) { -; CHECK-LABEL: strided_load_i8_nostride: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0), v0.t -; CHECK-NEXT: ret - %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 1, <32 x i1> %m) - ret <32 x i8> %res -} - - -define void @strided_store_i8(ptr %p, <32 x i8> %v, i64 %stride, <32 x i1> %m) { -; CHECK-LABEL: strided_store_i8: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t -; CHECK-NEXT: ret - call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 %stride, <32 x i1> %m) - ret void -} - -define void @strided_store_i8_zero(ptr %p, <32 x i8> %v, <32 x i1> %m) { -; CHECK-LABEL: strided_store_i8_zero: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vsse8.v v8, (a0), zero, v0.t -; CHECK-NEXT: ret - call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 0, <32 x i1> %m) - ret void -} - -define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) { -; CHECK-LABEL: strided_store_i8_nostride: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vse8.v v8, (a0), v0.t -; CHECK-NEXT: ret - call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m) - ret void -} - -define void @strided_store_i8_reverse(ptr %p, <32 x i8> %v, <32 x i1> %m) { -; CHECK-LABEL: strided_store_i8_reverse: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: li a2, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t -; CHECK-NEXT: ret - call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 -1, <32 x i1> %m) - ret void -} - -declare void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64(, ptr, i64, ) - -declare @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(, ptr, i64, ) - -define @strided_load_vscale_i64(ptr %p, i64 %stride, %m) { -; CHECK-LABEL: strided_load_vscale_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t -; CHECK-NEXT: ret - %res = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( undef, ptr %p, i64 %stride, %m) - ret %res -} - -define void @strided_store_vscale_i64(ptr %p, %v, i64 %stride, %m) { -; CHECK-LABEL: strided_store_vscale_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-NEXT: ret - call void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64( %v, ptr %p, i64 %stride, %m) - ret void -} diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll index 70412de1d0e91..381d1183995be 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll @@ -16,12 +16,14 @@ define @gather(ptr %a, i32 %len) { ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACCUM:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 -; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( undef, ptr [[TMP1]], i64 16, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP2]]) +; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP3]], undef, i32 [[TMP2]]) ; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[GATHER]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret [[ACCUM_NEXT]] ; @@ -59,7 +61,9 @@ define @gather_disjoint_or(ptr %a, i64 %len) { ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACCUM:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[TMP0]], i64 16, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP1]]) +; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP2]], poison, i32 [[TMP1]]) ; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[GATHER]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 2 @@ -111,11 +115,12 @@ define void @scatter(ptr %a, i32 %len) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64( zeroinitializer, ptr [[TMP1]], i64 16, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64( zeroinitializer, ptr [[TMP1]], i64 16, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP2]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -144,7 +149,9 @@ for.cond.cleanup: ; preds = %vector.body define @gather_loopless(ptr %p, i64 %stride) { ; CHECK-LABEL: @gather_loopless( ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4 -; CHECK-NEXT: [[X:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[P:%.*]], i64 [[TMP1]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP2]]) +; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP3]], poison, i32 [[TMP2]]) ; CHECK-NEXT: ret [[X]] ; %step = call @llvm.experimental.stepvector.nxv1i64() @@ -164,7 +171,9 @@ define @gather_loopless(ptr %p, i64 %stride) { define @straightline_offset_add(ptr %p, i64 %offset) { ; CHECK-LABEL: @straightline_offset_add( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET:%.*]] -; CHECK-NEXT: [[X:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[TMP1]], i64 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP2]]) +; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP3]], poison, i32 [[TMP2]]) ; CHECK-NEXT: ret [[X]] ; %step = call @llvm.experimental.stepvector.nxv1i64() @@ -184,7 +193,9 @@ define @straightline_offset_add(ptr %p, i64 %offset) { define @straightline_offset_disjoint_or(ptr %p, i64 %offset) { ; CHECK-LABEL: @straightline_offset_disjoint_or( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[X:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[TMP1]], i64 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP2]]) +; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP3]], poison, i32 [[TMP2]]) ; CHECK-NEXT: ret [[X]] ; %step = call @llvm.experimental.stepvector.nxv1i64() @@ -202,7 +213,9 @@ define @straightline_offset_disjoint_or(ptr %p, i64 %offset) define @straightline_offset_shl(ptr %p) { ; CHECK-LABEL: @straightline_offset_shl( -; CHECK-NEXT: [[X:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[P:%.*]], i64 32, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP1]]) +; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP2]], poison, i32 [[TMP1]]) ; CHECK-NEXT: ret [[X]] ; %step = call @llvm.experimental.stepvector.nxv1i64() @@ -245,7 +258,9 @@ define @straightline_offset_shl_nonc(ptr %p, i64 %shift) { ; CHECK-LABEL: @straightline_offset_shl_nonc( ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 1, [[SHIFT:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; CHECK-NEXT: [[X:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[P:%.*]], i64 [[TMP2]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP3]]) +; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP4]], poison, i32 [[TMP3]]) ; CHECK-NEXT: ret [[X]] ; %step = call @llvm.experimental.stepvector.nxv1i64() @@ -265,7 +280,8 @@ define @straightline_offset_shl_nonc(ptr %p, i64 %shift) { define void @scatter_loopless( %x, ptr %p, i64 %stride) { ; CHECK-LABEL: @scatter_loopless( ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4 -; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64( [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64( [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP2]]) ; CHECK-NEXT: ret void ; %step = call @llvm.experimental.stepvector.nxv1i64() @@ -301,8 +317,10 @@ define void @constant_stride( %x, ptr %p, i64 %stride) { define @vector_base_scalar_offset(ptr %p, i64 %offset) { ; CHECK-LABEL: @vector_base_scalar_offset( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] -; CHECK-NEXT: [[X:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[TMP1]], i64 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[PTRS2OFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP1]]) +; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP2]], poison, i32 [[TMP1]]) ; CHECK-NEXT: ret [[X]] ; %step = call @llvm.experimental.stepvector.nxv1i64() @@ -319,8 +337,10 @@ define @vector_base_scalar_offset(ptr %p, i64 %offset) { define @splat_base_scalar_offset(ptr %p, i64 %offset) { ; CHECK-LABEL: @splat_base_scalar_offset( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] -; CHECK-NEXT: [[X:%.*]] = call @llvm.riscv.masked.strided.load.nxv1i64.p0.i64( poison, ptr [[TMP1]], i64 0, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[PTRSOFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP1]]) +; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), [[TMP2]], poison, i32 [[TMP1]]) ; CHECK-NEXT: ret [[X]] ; %head = insertelement poison, ptr %p, i32 0