diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ee45f730dc450..b4165763bc561 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11666,31 +11666,56 @@ SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, auto *VPNode = cast(Op); // Check if the mask is known to be all ones SDValue Mask = VPNode->getMask(); + SDValue VL = VPNode->getVectorLength(); + SDValue Stride = VPNode->getStride(); bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); - - SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse - : Intrinsic::riscv_vlse_mask, - DL, XLenVT); - SmallVector Ops{VPNode->getChain(), IntID, - DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), - VPNode->getStride()}; - if (!IsUnmasked) { - if (VT.isFixedLengthVector()) { - MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + SDValue Result, Chain; + + // TODO: We restrict this to unmasked loads currently in consideration of + // the complexity of handling all falses masks. + MVT ScalarVT = ContainerVT.getVectorElementType(); + if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger() && + !Subtarget.hasOptimizedZeroStrideLoad()) { + SDValue ScalarLoad = + DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, VPNode->getChain(), + VPNode->getBasePtr(), ScalarVT, VPNode->getMemOperand()); + Chain = ScalarLoad.getValue(1); + Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, + Subtarget); + } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT) && + !Subtarget.hasOptimizedZeroStrideLoad()) { + SDValue ScalarLoad = + DAG.getLoad(ScalarVT, DL, VPNode->getChain(), VPNode->getBasePtr(), + VPNode->getMemOperand()); + Chain = ScalarLoad.getValue(1); + Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, + Subtarget); + } else { + SDValue IntID = DAG.getTargetConstant( + IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, + XLenVT); + SmallVector Ops{VPNode->getChain(), IntID, + DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), + Stride}; + if (!IsUnmasked) { + if (VT.isFixedLengthVector()) { + MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + Ops.push_back(Mask); + } + Ops.push_back(VL); + if (!IsUnmasked) { + SDValue Policy = + DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); + Ops.push_back(Policy); } - Ops.push_back(Mask); - } - Ops.push_back(VPNode->getVectorLength()); - if (!IsUnmasked) { - SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); - Ops.push_back(Policy); - } - SDValue Result = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, - VPNode->getMemoryVT(), VPNode->getMemOperand()); - SDValue Chain = Result.getValue(1); + Result = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, + VPNode->getMemoryVT(), VPNode->getMemOperand()); + Chain = Result.getValue(1); + } if (VT.isFixedLengthVector()) Result = convertFromScalableVector(VT, Result, DAG, Subtarget); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 5e64e9fbc1a2f..980491a35f0ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -1,10 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ -; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-OPT ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ -; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-OPT +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-NOOPT +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-NOOPT declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32) @@ -626,3 +632,39 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask } declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, <33 x i1>, i32) + +; Test unmasked integer zero strided +define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr, i32 zeroext %evl) { +; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: +; CHECK-OPT: # %bb.0: +; CHECK-OPT-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero +; CHECK-OPT-NEXT: ret +; +; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: +; CHECK-NOOPT: # %bb.0: +; CHECK-NOOPT-NEXT: lbu a0, 0(a0) +; CHECK-NOOPT-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NOOPT-NEXT: vmv.v.x v8, a0 +; CHECK-NOOPT-NEXT: ret + %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x i8> %load +} + +; Test unmasked float zero strided +define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr, i32 zeroext %evl) { +; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: +; CHECK-OPT: # %bb.0: +; CHECK-OPT-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero +; CHECK-OPT-NEXT: ret +; +; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_4f16: +; CHECK-NOOPT: # %bb.0: +; CHECK-NOOPT-NEXT: flh fa5, 0(a0) +; CHECK-NOOPT-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NOOPT-NEXT: vfmv.v.f v8, fa5 +; CHECK-NOOPT-NEXT: ret + %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x half> %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 4d3bced0bcb50..775c272d21f70 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,10 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ -; RUN: -check-prefixes=CHECK,CHECK-RV32 +; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-OPT ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ -; RUN: -check-prefixes=CHECK,CHECK-RV64 +; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-OPT +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-NOOPT +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-NOOPT declare @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, , i32) @@ -780,3 +786,39 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr, i64, , i32) declare @llvm.experimental.vector.extract.nxv1f64( %vec, i64 %idx) declare @llvm.experimental.vector.extract.nxv16f64( %vec, i64 %idx) + +; Test unmasked integer zero strided +define @zero_strided_unmasked_vpload_nxv1i8_i8(ptr %ptr, i32 zeroext %evl) { +; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8: +; CHECK-OPT: # %bb.0: +; CHECK-OPT-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero +; CHECK-OPT-NEXT: ret +; +; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8: +; CHECK-NOOPT: # %bb.0: +; CHECK-NOOPT-NEXT: lbu a0, 0(a0) +; CHECK-NOOPT-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NOOPT-NEXT: vmv.v.x v8, a0 +; CHECK-NOOPT-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 0, splat (i1 true), i32 %evl) + ret %load +} + +; Test unmasked float zero strided +define @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr, i32 zeroext %evl) { +; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-OPT: # %bb.0: +; CHECK-OPT-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero +; CHECK-OPT-NEXT: ret +; +; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-NOOPT: # %bb.0: +; CHECK-NOOPT-NEXT: flh fa5, 0(a0) +; CHECK-NOOPT-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NOOPT-NEXT: vfmv.v.f v8, fa5 +; CHECK-NOOPT-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %evl) + ret %load +}