diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 920b06c7ba6ec..6ef857ca2e2b4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1347,6 +1347,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE}, + VT, Custom); + if (VT.getVectorElementType() == MVT::f16 && !Subtarget.hasVInstructionsF16()) { setOperationAction(ISD::BITCAST, VT, Custom); @@ -1410,10 +1414,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction( {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); - setOperationAction({ISD::EXPERIMENTAL_VP_STRIDED_LOAD, - ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, - ISD::VP_SCATTER}, - VT, Custom); + setOperationAction({ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 14cc3edffb8c1..47efa058df641 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -1,16 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32) @@ -278,6 +290,62 @@ define <8 x i64> @strided_vpload_v8i64(ptr %ptr, i32 signext %stride, <8 x i1> % ret <8 x i64> %load } +declare <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr, i32, <2 x i1>, i32) + +define <2 x bfloat> @strided_vpload_v2bf16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %load +} + +define <2 x bfloat> @strided_vpload_v2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_v2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret + %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %load +} + +declare <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr, i32, <4 x i1>, i32) + +define <4 x bfloat> @strided_vpload_v4bf16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %load +} + +declare <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr, i32, <8 x i1>, i32) + +define <8 x bfloat> @strided_vpload_v8bf16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %load +} + +define <8 x bfloat> @strided_vpload_v8bf16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_v8bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %load +} + declare <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr, i32, <2 x i1>, i32) define <2 x half> @strided_vpload_v2f16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { @@ -477,10 +545,10 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: bltu a2, a4, .LBB40_2 +; CHECK-NEXT: bltu a2, a4, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB40_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: mul a4, a3, a1 ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: addi a5, a2, -16 @@ -505,10 +573,10 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext % ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: bltu a2, a4, .LBB41_2 +; CHECK-NEXT: bltu a2, a4, .LBB46_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB41_2: +; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: mul a4, a3, a1 ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: addi a5, a2, -16 @@ -533,10 +601,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: li a5, 32 ; CHECK-RV32-NEXT: vmv1r.v v8, v0 ; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_2 +; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: li a3, 32 -; CHECK-RV32-NEXT: .LBB42_2: +; CHECK-RV32-NEXT: .LBB47_2: ; CHECK-RV32-NEXT: mul a6, a3, a2 ; CHECK-RV32-NEXT: addi a5, a4, -32 ; CHECK-RV32-NEXT: sltu a7, a4, a5 @@ -544,10 +612,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: and a7, a7, a5 ; CHECK-RV32-NEXT: li a5, 16 ; CHECK-RV32-NEXT: add a6, a1, a6 -; CHECK-RV32-NEXT: bltu a7, a5, .LBB42_4 +; CHECK-RV32-NEXT: bltu a7, a5, .LBB47_4 ; CHECK-RV32-NEXT: # %bb.3: ; CHECK-RV32-NEXT: li a7, 16 -; CHECK-RV32-NEXT: .LBB42_4: +; CHECK-RV32-NEXT: .LBB47_4: ; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma @@ -556,10 +624,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: sltu a3, a3, a6 ; CHECK-RV32-NEXT: addi a3, a3, -1 ; CHECK-RV32-NEXT: and a3, a3, a6 -; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_6 +; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_6 ; CHECK-RV32-NEXT: # %bb.5: ; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: .LBB42_6: +; CHECK-RV32-NEXT: .LBB47_6: ; CHECK-RV32-NEXT: mul a5, a4, a2 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 @@ -583,10 +651,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: li a5, 32 ; CHECK-RV64-NEXT: vmv1r.v v8, v0 ; CHECK-RV64-NEXT: mv a4, a3 -; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_2 +; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_2 ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: li a4, 32 -; CHECK-RV64-NEXT: .LBB42_2: +; CHECK-RV64-NEXT: .LBB47_2: ; CHECK-RV64-NEXT: mul a6, a4, a2 ; CHECK-RV64-NEXT: addi a5, a3, -32 ; CHECK-RV64-NEXT: sltu a7, a3, a5 @@ -594,10 +662,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: and a7, a7, a5 ; CHECK-RV64-NEXT: li a5, 16 ; CHECK-RV64-NEXT: add a6, a1, a6 -; CHECK-RV64-NEXT: bltu a7, a5, .LBB42_4 +; CHECK-RV64-NEXT: bltu a7, a5, .LBB47_4 ; CHECK-RV64-NEXT: # %bb.3: ; CHECK-RV64-NEXT: li a7, 16 -; CHECK-RV64-NEXT: .LBB42_4: +; CHECK-RV64-NEXT: .LBB47_4: ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma @@ -606,10 +674,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: sltu a4, a4, a6 ; CHECK-RV64-NEXT: addi a4, a4, -1 ; CHECK-RV64-NEXT: and a4, a4, a6 -; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_6 +; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_6 ; CHECK-RV64-NEXT: # %bb.5: ; CHECK-RV64-NEXT: li a3, 16 -; CHECK-RV64-NEXT: .LBB42_6: +; CHECK-RV64-NEXT: .LBB47_6: ; CHECK-RV64-NEXT: mul a5, a3, a2 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 @@ -659,12 +727,19 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; -; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16: -; CHECK-NO-OPT: # %bb.0: -; CHECK-NO-OPT-NEXT: flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma -; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5 -; CHECK-NO-OPT-NEXT: ret +; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_4f16: +; CHECK-NO-OPT-ZVFH: # %bb.0: +; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0) +; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5 +; CHECK-NO-OPT-ZVFH-NEXT: ret +; +; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_4f16: +; CHECK-NO-OPT-ZVFHMIN: # %bb.0: +; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0) +; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-ZVFHMIN-NEXT: ret %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) ret <4 x half> %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll index ddd86c3082021..7ca329835b7ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 @@ -232,6 +238,52 @@ define void @strided_vpstore_v8i64(<8 x i64> %val, ptr %ptr, i32 signext %stride ret void } +declare void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat>, ptr, i32, <2 x i1>, i32) + +define void @strided_vpstore_v2bf16(<2 x bfloat> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat>, ptr, i32, <4 x i1>, i32) + +define void @strided_vpstore_v4bf16(<4 x bfloat> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat>, ptr, i32, <8 x i1>, i32) + +define void @strided_vpstore_v8bf16(<8 x bfloat> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) + ret void +} + +define void @strided_vpstore_v8bf16_unit_stride(<8 x bfloat> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_v8bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) + ret void +} + declare void @llvm.experimental.vp.strided.store.v2f16.p0.i32(<2 x half>, ptr, i32, <2 x i1>, i32) define void @strided_vpstore_v2f16(<2 x half> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { @@ -409,10 +461,10 @@ define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %strid ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: bltu a2, a4, .LBB34_2 +; CHECK-NEXT: bltu a2, a4, .LBB38_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: .LBB38_2: ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: mul a3, a3, a1 @@ -435,10 +487,10 @@ define void @strided_store_v32f64_allones_mask(<32 x double> %v, ptr %ptr, i32 s ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: bltu a2, a4, .LBB35_2 +; CHECK-NEXT: bltu a2, a4, .LBB39_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: .LBB39_2: ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1 ; CHECK-NEXT: mul a3, a3, a1