diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3b6dd0c11bbf9..613cb01667d04 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1339,9 +1339,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS}, VT, Custom); - // FIXME: mload, mstore, mgather, mscatter, vp_gather/scatter can be + // FIXME: mload, mstore, vp_gather/scatter can be // hoisted to here. - setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MGATHER, ISD::MSCATTER}, + VT, Custom); setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::EXPERIMENTAL_VP_STRIDED_STORE}, @@ -1408,8 +1409,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT, Custom); - setOperationAction( - {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); + setOperationAction({ISD::MLOAD, ISD::MSTORE}, VT, Custom); setOperationAction({ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 29a6c68a6c585..7d1e9007adc0d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -270,7 +270,12 @@ class RISCVTTIImpl : public BasicTTIImplBase { if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; - return TLI->isLegalElementTypeForRVV(ElemType); + // TODO: Move bf16/f16 support into isLegalElementTypeForRVV + return TLI->isLegalElementTypeForRVV(ElemType) || + (DataTypeVT.getVectorElementType() == MVT::bf16 && + ST->hasVInstructionsBF16Minimal()) || + (DataTypeVT.getVectorElementType() == MVT::f16 && + ST->hasVInstructionsF16Minimal()); } bool isLegalMaskedGather(Type *DataType, Align Alignment) { diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll index b0bbb23a56f3f..ae913456e344c 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s ; Check that we don't crash querying costs when vectors are not enabled. ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 @@ -14,6 +15,12 @@ define i32 @masked_gather() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32BF16 = call <32 x bfloat> @llvm.masked.gather.v32bf16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16BF16 = call <16 x bfloat> @llvm.masked.gather.v16bf16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8BF16 = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4BF16 = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x bfloat> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32F16 = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef) @@ -51,6 +58,12 @@ define i32 @masked_gather() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32BF16.u = call <32 x bfloat> @llvm.masked.gather.v32bf16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16BF16.u = call <16 x bfloat> @llvm.masked.gather.v16bf16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8BF16.u = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4BF16.u = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2BF16.u = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16.u = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x bfloat> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x half> undef) @@ -85,6 +98,13 @@ define i32 @masked_gather() { %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef) %V1F32 = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x float> undef) + %V32BF16 = call <32 x bfloat> @llvm.masked.gather.v32bf16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x bfloat> undef) + %V16BF16 = call <16 x bfloat> @llvm.masked.gather.v16bf16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x bfloat> undef) + %V8BF16 = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x bfloat> undef) + %V4BF16 = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x bfloat> undef) + %V2BF16 = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x bfloat> undef) + %V1BF16 = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x bfloat> undef) + %V32F16 = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x half> undef) %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef) %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef) @@ -130,6 +150,13 @@ define i32 @masked_gather() { %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x float> undef) %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x float> undef) + %V32BF16.u = call <32 x bfloat> @llvm.masked.gather.v32bf16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x bfloat> undef) + %V16BF16.u = call <16 x bfloat> @llvm.masked.gather.v16bf16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x bfloat> undef) + %V8BF16.u = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x bfloat> undef) + %V4BF16.u = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x bfloat> undef) + %V2BF16.u = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x bfloat> undef) + %V1BF16.u = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x bfloat> undef) + %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x half> undef) %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x half> undef) %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x half> undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll index 96f5153f05494..9158c2c829135 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s ; Check that we don't crash querying costs when vectors are not enabled. ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 @@ -14,6 +15,12 @@ define i32 @masked_scatter() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 4, <1 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32bf16.v32p0(<32 x bfloat> undef, <32 x ptr> undef, i32 2, <32 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16bf16.v16p0(<16 x bfloat> undef, <16 x ptr> undef, i32 2, <16 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> undef, <8 x ptr> undef, i32 2, <8 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> undef, <4 x ptr> undef, i32 2, <4 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> undef, <1 x ptr> undef, i32 2, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 2, <32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef) @@ -51,6 +58,12 @@ define i32 @masked_scatter() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 2, <4 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 2, <1 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32bf16.v32p0(<32 x bfloat> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16bf16.v16p0(<16 x bfloat> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) @@ -85,6 +98,13 @@ define i32 @masked_scatter() { call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef) call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 4, <1 x i1> undef) + call void @llvm.masked.scatter.v32bf16.v32p0(<32 x bfloat> undef, <32 x ptr> undef, i32 2, <32 x i1> undef) + call void @llvm.masked.scatter.v16bf16.v16p0(<16 x bfloat> undef, <16 x ptr> undef, i32 2, <16 x i1> undef) + call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> undef, <8 x ptr> undef, i32 2, <8 x i1> undef) + call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> undef, <4 x ptr> undef, i32 2, <4 x i1> undef) + call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) + call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> undef, <1 x ptr> undef, i32 2, <1 x i1> undef) + call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 2, <32 x i1> undef) call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef) call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef) @@ -130,6 +150,13 @@ define i32 @masked_scatter() { call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 2, <1 x i1> undef) + call void @llvm.masked.scatter.v32bf16.v32p0(<32 x bfloat> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16bf16.v16p0(<16 x bfloat> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll index 2f35f72ca8c5c..8504b4cee8a6f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFH -; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFHMIN +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256 ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED @@ -166,23 +166,14 @@ define void @masked_gather_aligned() { } define void @masked_gather_aligned_f16() { -; GENERICZVFH-LABEL: 'masked_gather_aligned_f16' -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0( undef, i32 2, undef, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call @llvm.masked.gather.nxv16f16.nxv16p0( undef, i32 2, undef, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call @llvm.masked.gather.nxv8f16.nxv8p0( undef, i32 2, undef, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call @llvm.masked.gather.nxv4f16.nxv4p0( undef, i32 2, undef, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call @llvm.masked.gather.nxv2f16.nxv2p0( undef, i32 2, undef, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call @llvm.masked.gather.nxv1f16.nxv1p0( undef, i32 2, undef, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; GENERICZVFHMIN-LABEL: 'masked_gather_aligned_f16' -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0( undef, i32 2, undef, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V16F16 = call @llvm.masked.gather.nxv16f16.nxv16p0( undef, i32 2, undef, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V8F16 = call @llvm.masked.gather.nxv8f16.nxv8p0( undef, i32 2, undef, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V4F16 = call @llvm.masked.gather.nxv4f16.nxv4p0( undef, i32 2, undef, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V2F16 = call @llvm.masked.gather.nxv2f16.nxv2p0( undef, i32 2, undef, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V1F16 = call @llvm.masked.gather.nxv1f16.nxv1p0( undef, i32 2, undef, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; GENERIC-LABEL: 'masked_gather_aligned_f16' +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call @llvm.masked.gather.nxv16f16.nxv16p0( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call @llvm.masked.gather.nxv8f16.nxv8p0( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call @llvm.masked.gather.nxv4f16.nxv4p0( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call @llvm.masked.gather.nxv2f16.nxv2p0( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call @llvm.masked.gather.nxv1f16.nxv1p0( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; MAX256-LABEL: 'masked_gather_aligned_f16' ; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0( undef, i32 2, undef, undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll index 8e917a4296661..0e3c43a228ef4 100644 --- a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFH -; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFHMIN +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256 ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED @@ -166,23 +166,14 @@ define void @masked_scatter_aligned() { } define void @masked_scatter_aligned_f16() { -; GENERICZVFH-LABEL: 'masked_scatter_aligned_f16' -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0( undef, undef, i32 2, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0( undef, undef, i32 2, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0( undef, undef, i32 2, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0( undef, undef, i32 2, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0( undef, undef, i32 2, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0( undef, undef, i32 2, undef) -; GENERICZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; GENERICZVFHMIN-LABEL: 'masked_scatter_aligned_f16' -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0( undef, undef, i32 2, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0( undef, undef, i32 2, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0( undef, undef, i32 2, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0( undef, undef, i32 2, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0( undef, undef, i32 2, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0( undef, undef, i32 2, undef) -; GENERICZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; GENERIC-LABEL: 'masked_scatter_aligned_f16' +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; MAX256-LABEL: 'masked_scatter_aligned_f16' ; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0( undef, undef, i32 2, undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 5802f45d311b3..5a7b512e4ea5f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -1,12 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN + +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>) @@ -6874,31 +6883,31 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ret <8 x i64> %v } -declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>) +declare <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x bfloat>) -define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) { -; RV32V-LABEL: mgather_v1f16: +define <1 x bfloat> @mgather_v1bf16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x bfloat> %passthru) { +; RV32V-LABEL: mgather_v1bf16: ; RV32V: # %bb.0: ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t ; RV32V-NEXT: vmv1r.v v8, v9 ; RV32V-NEXT: ret ; -; RV64V-LABEL: mgather_v1f16: +; RV64V-LABEL: mgather_v1bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t ; RV64V-NEXT: vmv1r.v v8, v9 ; RV64V-NEXT: ret ; -; RV32ZVE32F-LABEL: mgather_v1f16: +; RV32ZVE32F-LABEL: mgather_v1bf16: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t ; RV32ZVE32F-NEXT: vmv1r.v v8, v9 ; RV32ZVE32F-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_v1f16: +; RV64ZVE32F-LABEL: mgather_v1bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vfirst.m a1, v0 @@ -6908,35 +6917,35 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt ; RV64ZVE32F-NEXT: vle16.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB58_2: # %else ; RV64ZVE32F-NEXT: ret - %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru) - ret <1 x half> %v + %v = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x bfloat> %passthru) + ret <1 x bfloat> %v } -declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>) +declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x bfloat>) -define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) { -; RV32V-LABEL: mgather_v2f16: +define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %passthru) { +; RV32V-LABEL: mgather_v2bf16: ; RV32V: # %bb.0: ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t ; RV32V-NEXT: vmv1r.v v8, v9 ; RV32V-NEXT: ret ; -; RV64V-LABEL: mgather_v2f16: +; RV64V-LABEL: mgather_v2bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t ; RV64V-NEXT: vmv1r.v v8, v9 ; RV64V-NEXT: ret ; -; RV32ZVE32F-LABEL: mgather_v2f16: +; RV32ZVE32F-LABEL: mgather_v2bf16: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t ; RV32ZVE32F-NEXT: vmv1r.v v8, v9 ; RV32ZVE32F-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_v2f16: +; RV64ZVE32F-LABEL: mgather_v2bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 @@ -6948,40 +6957,40 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt ; RV64ZVE32F-NEXT: .LBB59_2: # %else2 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 ; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1 -; RV64ZVE32F-NEXT: flh fa5, 0(a1) +; RV64ZVE32F-NEXT: lh a0, 0(a1) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret - %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru) - ret <2 x half> %v + %v = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x bfloat> %passthru) + ret <2 x bfloat> %v } -declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>) +declare <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x bfloat>) -define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) { -; RV32-LABEL: mgather_v4f16: +define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %passthru) { +; RV32-LABEL: mgather_v4bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_v4f16: +; RV64V-LABEL: mgather_v4bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_v4f16: +; RV64ZVE32F-LABEL: mgather_v4bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -7000,110 +7009,110 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2 ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3 ; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_4 ; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7 ; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 ; RV64ZVE32F-NEXT: ret - %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru) - ret <4 x half> %v + %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x bfloat> %passthru) + ret <4 x bfloat> %v } -define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) { -; RV32-LABEL: mgather_truemask_v4f16: +define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) { +; RV32-LABEL: mgather_truemask_v4bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-NEXT: vluxei32.v v9, (zero), v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_truemask_v4f16: +; RV64V-LABEL: mgather_truemask_v4bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64V-NEXT: vluxei64.v v10, (zero), v8 ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_truemask_v4f16: +; RV64ZVE32F-LABEL: mgather_truemask_v4bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 0(a0) ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: ld a3, 16(a0) ; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a1) -; RV64ZVE32F-NEXT: flh fa4, 0(a2) -; RV64ZVE32F-NEXT: flh fa3, 0(a3) -; RV64ZVE32F-NEXT: flh fa2, 0(a0) +; RV64ZVE32F-NEXT: lh a1, 0(a1) +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: lh a3, 0(a3) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.v.f v8, fa5 -; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 -; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 -; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa2 +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret - %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru) - ret <4 x half> %v + %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x bfloat> %passthru) + ret <4 x bfloat> %v } -define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) { -; RV32-LABEL: mgather_falsemask_v4f16: +define <4 x bfloat> @mgather_falsemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) { +; RV32-LABEL: mgather_falsemask_v4bf16: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_falsemask_v4f16: +; RV64V-LABEL: mgather_falsemask_v4bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_falsemask_v4f16: +; RV64ZVE32F-LABEL: mgather_falsemask_v4bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ret - %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru) - ret <4 x half> %v + %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x bfloat> %passthru) + ret <4 x bfloat> %v } -declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>) +declare <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x bfloat>) -define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) { -; RV32-LABEL: mgather_v8f16: +define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %passthru) { +; RV32-LABEL: mgather_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_v8f16: +; RV64V-LABEL: mgather_v8bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_v8f16: +; RV64ZVE32F-LABEL: mgather_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -7134,73 +7143,73 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2 ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3 ; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_4 ; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_5 ; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_6 ; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_7 ; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB63_8 ; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19 ; RV64ZVE32F-NEXT: ld a0, 56(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 ; RV64ZVE32F-NEXT: ret - %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) - ret <8 x half> %v + %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) + ret <8 x bfloat> %v } -define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { -; RV32-LABEL: mgather_baseidx_v8i8_v8f16: +define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { +; RV32-LABEL: mgather_baseidx_v8i8_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vsext.vf4 v10, v8 @@ -7210,7 +7219,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_baseidx_v8i8_v8f16: +; RV64V-LABEL: mgather_baseidx_v8i8_v8bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64V-NEXT: vsext.vf8 v12, v8 @@ -7220,7 +7229,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f16: +; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -7230,9 +7239,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB64_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_4 @@ -7242,9 +7251,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2 @@ -7269,9 +7278,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14 @@ -7289,9 +7298,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7302,9 +7311,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -7314,9 +7323,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -7326,9 +7335,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -7339,20 +7348,20 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret - %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs - %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) - ret <8 x half> %v + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs + %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) + ret <8 x bfloat> %v } -define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { -; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16: +define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { +; RV32-LABEL: mgather_baseidx_sext_v8i8_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vsext.vf4 v10, v8 @@ -7362,7 +7371,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16: +; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64V-NEXT: vsext.vf8 v12, v8 @@ -7372,7 +7381,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f16: +; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -7382,9 +7391,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB65_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_4 @@ -7394,9 +7403,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2 @@ -7421,9 +7430,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14 @@ -7441,9 +7450,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7454,9 +7463,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -7466,9 +7475,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -7478,9 +7487,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -7491,21 +7500,21 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i16> - %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs - %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) - ret <8 x half> %v + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs + %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) + ret <8 x bfloat> %v } -define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { -; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16: +define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { +; RV32-LABEL: mgather_baseidx_zext_v8i8_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vwaddu.vv v10, v8, v8 @@ -7514,7 +7523,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16: +; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64V-NEXT: vwaddu.vv v10, v8, v8 @@ -7523,7 +7532,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f16: +; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -7534,9 +7543,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB66_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_4 @@ -7547,9 +7556,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2 @@ -7575,9 +7584,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14 @@ -7596,9 +7605,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7610,9 +7619,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -7623,9 +7632,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -7636,9 +7645,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -7650,21 +7659,21 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a1, a1, 255 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> - %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs - %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) - ret <8 x half> %v + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs + %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) + ret <8 x bfloat> %v } -define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) { -; RV32-LABEL: mgather_baseidx_v8f16: +define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { +; RV32-LABEL: mgather_baseidx_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vwadd.vv v10, v8, v8 @@ -7672,7 +7681,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; -; RV64V-LABEL: mgather_baseidx_v8f16: +; RV64V-LABEL: mgather_baseidx_v8bf16: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64V-NEXT: vsext.vf4 v12, v8 @@ -7682,7 +7691,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mgather_baseidx_v8f16: +; RV64ZVE32F-LABEL: mgather_baseidx_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -7693,8 +7702,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB67_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_4 @@ -7704,8 +7713,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB67_4: # %else2 @@ -7730,8 +7739,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB67_9: # %else14 @@ -7749,8 +7758,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7761,8 +7770,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -7772,8 +7781,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_8 @@ -7782,8 +7791,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -7794,12 +7803,1635 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs + %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) + ret <8 x bfloat> %v +} + +declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>) + +define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) { +; RV32V-LABEL: mgather_v1f16: +; RV32V: # %bb.0: +; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32V-NEXT: vmv1r.v v8, v9 +; RV32V-NEXT: ret +; +; RV64V-LABEL: mgather_v1f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64V-NEXT: vmv1r.v v8, v9 +; RV64V-NEXT: ret +; +; RV32ZVE32F-LABEL: mgather_v1f16: +; RV32ZVE32F: # %bb.0: +; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu +; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vmv1r.v v8, v9 +; RV32ZVE32F-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_v1f16: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vfirst.m a1, v0 +; RV64ZVE32F-NEXT: bnez a1, .LBB68_2 +; RV64ZVE32F-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vle16.v v8, (a0) +; RV64ZVE32F-NEXT: .LBB68_2: # %else +; RV64ZVE32F-NEXT: ret + %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru) + ret <1 x half> %v +} + +declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>) + +define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) { +; RV32V-LABEL: mgather_v2f16: +; RV32V: # %bb.0: +; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32V-NEXT: vmv1r.v v8, v9 +; RV32V-NEXT: ret +; +; RV64V-LABEL: mgather_v2f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64V-NEXT: vmv1r.v v8, v9 +; RV64V-NEXT: ret +; +; RV32ZVE32F-LABEL: mgather_v2f16: +; RV32ZVE32F: # %bb.0: +; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu +; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vmv1r.v v8, v9 +; RV32ZVE32F-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_v2f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB69_3 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_4 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else2 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB69_3: # %cond.load +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %cond.load1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v2f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB69_3 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a1) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru) + ret <2 x half> %v +} + +declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>) + +define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) { +; RV32-LABEL: mgather_v4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_v4f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64V-NEXT: vmv1r.v v8, v10 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_v4f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_5 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_3: # %else5 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_8 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else8 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %cond.load +; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %cond.load1 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_3 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_4 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v4f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_5 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_8 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_3 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru) + ret <4 x half> %v +} + +define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) { +; RV32-LABEL: mgather_truemask_v4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_truemask_v4f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-NEXT: vluxei64.v v10, (zero), v8 +; RV64V-NEXT: vmv1r.v v8, v10 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1) +; RV64ZVE32F-ZVFH-NEXT: flh fa4, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: flh fa3, 0(a3) +; RV64ZVE32F-ZVFH-NEXT: flh fa2, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.v.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa4 +; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa3 +; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa2 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a3, 0(a3) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru) + ret <4 x half> %v +} + +define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) { +; RV32-LABEL: mgather_falsemask_v4f16: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_falsemask_v4f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vmv1r.v v8, v10 +; RV64V-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_falsemask_v4f16: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: ret + %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru) + ret <4 x half> %v +} + +declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>) + +define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) { +; RV32-LABEL: mgather_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64V-NEXT: vmv.v.v v8, v12 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_9 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_10 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_2: # %else2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_3: # %else5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_12 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_4: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_5: # %else11 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_6: # %else14 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_15 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_7: # %else17 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB73_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_8: # %else20 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB73_9: # %cond.load +; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_2 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_10: # %cond.load1 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_3 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_11: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_4 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_12: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 24(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_5 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_13: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 32(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_14: # %cond.load13 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 40(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_15: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 6 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB73_8 +; RV64ZVE32F-ZVFH-NEXT: .LBB73_16: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: ld a0, 56(a0) +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_9 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_10 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_3: # %else5 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_12 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_4: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_5: # %else11 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_6: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_15 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_3 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 24(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_5 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 32(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 40(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_8 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 56(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) + ret <8 x half> %v +} + +define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { +; RV32-LABEL: mgather_baseidx_v8i8_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsext.vf4 v10, v8 +; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_baseidx_v8i8_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v8 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vmv.v.v v8, v9 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_6: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_7: # %else11 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_8: # %cond.load13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_9: # %else14 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB74_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB74_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB74_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB74_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else11 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_8: # %cond.load13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_9: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB74_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB74_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs + %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) + ret <8 x half> %v +} + +define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { +; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsext.vf4 v10, v8 +; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v8 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vmv.v.v v8, v9 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_6: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_7: # %else11 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_8: # %cond.load13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_9: # %else14 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB75_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB75_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB75_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB75_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else11 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_8: # %cond.load13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_9: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB75_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB75_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %eidxs = sext <8 x i8> %idxs to <8 x i16> + %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs + %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) + ret <8 x half> %v +} + +define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { +; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v8, v8 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t +; RV64V-NEXT: vmv.v.v v8, v9 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_7: # %else11 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_8: # %cond.load13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_9: # %else14 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB76_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB76_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB76_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else11 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %cond.load13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB76_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %eidxs = zext <8 x i8> %idxs to <8 x i16> + %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs + %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) + ret <8 x half> %v +} + +define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) { +; RV32-LABEL: mgather_baseidx_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vwadd.vv v10, v8, v8 +; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_baseidx_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v8 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vmv.v.v v8, v9 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_7: # %else11 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_8: # %cond.load13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_9: # %else14 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB77_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB77_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB77_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB77_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else11 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_8: # %cond.load13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_9: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB77_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB77_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) ret <8 x half> %v @@ -7833,11 +9465,11 @@ define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %pas ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vfirst.m a1, v0 -; RV64ZVE32F-NEXT: bnez a1, .LBB68_2 +; RV64ZVE32F-NEXT: bnez a1, .LBB78_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vle32.v v8, (a0) -; RV64ZVE32F-NEXT: .LBB68_2: # %else +; RV64ZVE32F-NEXT: .LBB78_2: # %else ; RV64ZVE32F-NEXT: ret %v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru) ret <1 x float> %v @@ -7872,19 +9504,19 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: bnez a3, .LBB69_3 +; RV64ZVE32F-NEXT: bnez a3, .LBB79_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB69_4 -; RV64ZVE32F-NEXT: .LBB69_2: # %else2 +; RV64ZVE32F-NEXT: bnez a2, .LBB79_4 +; RV64ZVE32F-NEXT: .LBB79_2: # %else2 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB69_3: # %cond.load +; RV64ZVE32F-NEXT: .LBB79_3: # %cond.load ; RV64ZVE32F-NEXT: flw fa5, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB69_2 -; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a2, .LBB79_2 +; RV64ZVE32F-NEXT: .LBB79_4: # %cond.load1 ; RV64ZVE32F-NEXT: flw fa5, 0(a1) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 @@ -7917,26 +9549,26 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_5 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_5 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_6 -; RV64ZVE32F-NEXT: .LBB70_2: # %else2 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_6 +; RV64ZVE32F-NEXT: .LBB80_2: # %else2 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_7 -; RV64ZVE32F-NEXT: .LBB70_3: # %else5 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_7 +; RV64ZVE32F-NEXT: .LBB80_3: # %else5 ; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB70_8 -; RV64ZVE32F-NEXT: .LBB70_4: # %else8 +; RV64ZVE32F-NEXT: bnez a1, .LBB80_8 +; RV64ZVE32F-NEXT: .LBB80_4: # %else8 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB70_5: # %cond.load +; RV64ZVE32F-NEXT: .LBB80_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB70_2 -; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 +; RV64ZVE32F-NEXT: .LBB80_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -7944,16 +9576,16 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB70_3 -; RV64ZVE32F-NEXT: .LBB70_7: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a2, .LBB80_3 +; RV64ZVE32F-NEXT: .LBB80_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB70_4 -; RV64ZVE32F-NEXT: .LBB70_8: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a1, .LBB80_4 +; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load7 ; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -8038,38 +9670,38 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_9 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_10 -; RV64ZVE32F-NEXT: .LBB73_2: # %else2 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_10 +; RV64ZVE32F-NEXT: .LBB83_2: # %else2 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_11 -; RV64ZVE32F-NEXT: .LBB73_3: # %else5 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_11 +; RV64ZVE32F-NEXT: .LBB83_3: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_12 -; RV64ZVE32F-NEXT: .LBB73_4: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 +; RV64ZVE32F-NEXT: .LBB83_4: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_13 -; RV64ZVE32F-NEXT: .LBB73_5: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_13 +; RV64ZVE32F-NEXT: .LBB83_5: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_14 -; RV64ZVE32F-NEXT: .LBB73_6: # %else14 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_14 +; RV64ZVE32F-NEXT: .LBB83_6: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_15 -; RV64ZVE32F-NEXT: .LBB73_7: # %else17 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_15 +; RV64ZVE32F-NEXT: .LBB83_7: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB73_16 -; RV64ZVE32F-NEXT: .LBB73_8: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB83_16 +; RV64ZVE32F-NEXT: .LBB83_8: # %else20 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load +; RV64ZVE32F-NEXT: .LBB83_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB73_2 -; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_2 +; RV64ZVE32F-NEXT: .LBB83_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -8077,48 +9709,48 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB73_3 -; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_3 +; RV64ZVE32F-NEXT: .LBB83_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB73_4 -; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_4 +; RV64ZVE32F-NEXT: .LBB83_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB73_5 -; RV64ZVE32F-NEXT: .LBB73_13: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_5 +; RV64ZVE32F-NEXT: .LBB83_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB73_6 -; RV64ZVE32F-NEXT: .LBB73_14: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_6 +; RV64ZVE32F-NEXT: .LBB83_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB73_7 -; RV64ZVE32F-NEXT: .LBB73_15: # %cond.load16 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_7 +; RV64ZVE32F-NEXT: .LBB83_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB73_8 -; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB83_8 +; RV64ZVE32F-NEXT: .LBB83_16: # %cond.load19 ; RV64ZVE32F-NEXT: ld a0, 56(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -8154,7 +9786,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8162,9 +9794,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: .LBB74_2: # %else +; RV64ZVE32F-NEXT: .LBB84_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8176,23 +9808,23 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 -; RV64ZVE32F-NEXT: .LBB74_4: # %else2 +; RV64ZVE32F-NEXT: .LBB84_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_13 -; RV64ZVE32F-NEXT: .LBB74_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_13 +; RV64ZVE32F-NEXT: .LBB84_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_14 -; RV64ZVE32F-NEXT: .LBB74_7: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 +; RV64ZVE32F-NEXT: .LBB84_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_9 -; RV64ZVE32F-NEXT: .LBB74_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_9 +; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8203,18 +9835,18 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 -; RV64ZVE32F-NEXT: .LBB74_9: # %else14 +; RV64ZVE32F-NEXT: .LBB84_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB74_16 -; RV64ZVE32F-NEXT: .LBB74_11: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB84_16 +; RV64ZVE32F-NEXT: .LBB84_11: # %else20 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB74_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB84_12: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8224,8 +9856,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 -; RV64ZVE32F-NEXT: .LBB74_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_6 +; RV64ZVE32F-NEXT: .LBB84_13: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8237,8 +9869,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 -; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_7 +; RV64ZVE32F-NEXT: .LBB84_14: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8249,9 +9881,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 -; RV64ZVE32F-NEXT: j .LBB74_9 -; RV64ZVE32F-NEXT: .LBB74_15: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_8 +; RV64ZVE32F-NEXT: j .LBB84_9 +; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8261,8 +9893,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB74_11 -; RV64ZVE32F-NEXT: .LBB74_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB84_11 +; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -8305,7 +9937,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB75_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB85_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8313,9 +9945,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: .LBB75_2: # %else +; RV64ZVE32F-NEXT: .LBB85_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB75_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB85_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8327,23 +9959,23 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 -; RV64ZVE32F-NEXT: .LBB75_4: # %else2 +; RV64ZVE32F-NEXT: .LBB85_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB75_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB75_13 -; RV64ZVE32F-NEXT: .LBB75_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_13 +; RV64ZVE32F-NEXT: .LBB85_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB75_14 -; RV64ZVE32F-NEXT: .LBB75_7: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_14 +; RV64ZVE32F-NEXT: .LBB85_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB75_9 -; RV64ZVE32F-NEXT: .LBB75_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB85_9 +; RV64ZVE32F-NEXT: .LBB85_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8354,18 +9986,18 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 -; RV64ZVE32F-NEXT: .LBB75_9: # %else14 +; RV64ZVE32F-NEXT: .LBB85_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB75_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB75_16 -; RV64ZVE32F-NEXT: .LBB75_11: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB85_16 +; RV64ZVE32F-NEXT: .LBB85_11: # %else20 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB75_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB85_12: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8375,8 +10007,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB75_6 -; RV64ZVE32F-NEXT: .LBB75_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB85_6 +; RV64ZVE32F-NEXT: .LBB85_13: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8388,8 +10020,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB75_7 -; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB85_7 +; RV64ZVE32F-NEXT: .LBB85_14: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8400,9 +10032,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB75_8 -; RV64ZVE32F-NEXT: j .LBB75_9 -; RV64ZVE32F-NEXT: .LBB75_15: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_8 +; RV64ZVE32F-NEXT: j .LBB85_9 +; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8412,8 +10044,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB75_11 -; RV64ZVE32F-NEXT: .LBB75_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB85_11 +; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -8458,7 +10090,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB76_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB86_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -8467,9 +10099,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: .LBB76_2: # %else +; RV64ZVE32F-NEXT: .LBB86_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB76_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB86_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8482,23 +10114,23 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 -; RV64ZVE32F-NEXT: .LBB76_4: # %else2 +; RV64ZVE32F-NEXT: .LBB86_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB76_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB76_13 -; RV64ZVE32F-NEXT: .LBB76_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_13 +; RV64ZVE32F-NEXT: .LBB86_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB76_14 -; RV64ZVE32F-NEXT: .LBB76_7: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_14 +; RV64ZVE32F-NEXT: .LBB86_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB76_9 -; RV64ZVE32F-NEXT: .LBB76_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB86_9 +; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8510,18 +10142,18 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 -; RV64ZVE32F-NEXT: .LBB76_9: # %else14 +; RV64ZVE32F-NEXT: .LBB86_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB76_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB76_16 -; RV64ZVE32F-NEXT: .LBB76_11: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB86_16 +; RV64ZVE32F-NEXT: .LBB86_11: # %else20 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB76_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8532,8 +10164,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB76_6 -; RV64ZVE32F-NEXT: .LBB76_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB86_6 +; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8546,8 +10178,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB76_7 -; RV64ZVE32F-NEXT: .LBB76_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB86_7 +; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -8559,9 +10191,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB76_8 -; RV64ZVE32F-NEXT: j .LBB76_9 -; RV64ZVE32F-NEXT: .LBB76_15: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_8 +; RV64ZVE32F-NEXT: j .LBB86_9 +; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8572,8 +10204,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB76_11 -; RV64ZVE32F-NEXT: .LBB76_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB86_11 +; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -8618,7 +10250,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB77_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB87_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8627,9 +10259,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: .LBB77_2: # %else +; RV64ZVE32F-NEXT: .LBB87_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB77_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB87_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8641,23 +10273,23 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 -; RV64ZVE32F-NEXT: .LBB77_4: # %else2 +; RV64ZVE32F-NEXT: .LBB87_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB77_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB77_13 -; RV64ZVE32F-NEXT: .LBB77_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_13 +; RV64ZVE32F-NEXT: .LBB87_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB77_14 -; RV64ZVE32F-NEXT: .LBB77_7: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_14 +; RV64ZVE32F-NEXT: .LBB87_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB77_9 -; RV64ZVE32F-NEXT: .LBB77_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB87_9 +; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8668,18 +10300,18 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 -; RV64ZVE32F-NEXT: .LBB77_9: # %else14 +; RV64ZVE32F-NEXT: .LBB87_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB77_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB77_16 -; RV64ZVE32F-NEXT: .LBB77_11: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB87_16 +; RV64ZVE32F-NEXT: .LBB87_11: # %else20 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB77_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB87_12: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8689,8 +10321,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB77_6 -; RV64ZVE32F-NEXT: .LBB77_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB87_6 +; RV64ZVE32F-NEXT: .LBB87_13: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8702,8 +10334,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB77_7 -; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB87_7 +; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8714,9 +10346,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB77_8 -; RV64ZVE32F-NEXT: j .LBB77_9 -; RV64ZVE32F-NEXT: .LBB77_15: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_8 +; RV64ZVE32F-NEXT: j .LBB87_9 +; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8726,8 +10358,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB77_11 -; RV64ZVE32F-NEXT: .LBB77_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB87_11 +; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -8770,7 +10402,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB78_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB88_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8779,9 +10411,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: .LBB78_2: # %else +; RV64ZVE32F-NEXT: .LBB88_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB78_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB88_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8793,23 +10425,23 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 -; RV64ZVE32F-NEXT: .LBB78_4: # %else2 +; RV64ZVE32F-NEXT: .LBB88_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB78_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB78_13 -; RV64ZVE32F-NEXT: .LBB78_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_13 +; RV64ZVE32F-NEXT: .LBB88_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB78_14 -; RV64ZVE32F-NEXT: .LBB78_7: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_14 +; RV64ZVE32F-NEXT: .LBB88_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB78_9 -; RV64ZVE32F-NEXT: .LBB78_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB88_9 +; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8820,18 +10452,18 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 -; RV64ZVE32F-NEXT: .LBB78_9: # %else14 +; RV64ZVE32F-NEXT: .LBB88_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB78_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB78_16 -; RV64ZVE32F-NEXT: .LBB78_11: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB88_16 +; RV64ZVE32F-NEXT: .LBB88_11: # %else20 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB78_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB88_12: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8841,8 +10473,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB78_6 -; RV64ZVE32F-NEXT: .LBB78_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB88_6 +; RV64ZVE32F-NEXT: .LBB88_13: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8854,8 +10486,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB78_7 -; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB88_7 +; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -8866,9 +10498,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 -; RV64ZVE32F-NEXT: j .LBB78_9 -; RV64ZVE32F-NEXT: .LBB78_15: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_8 +; RV64ZVE32F-NEXT: j .LBB88_9 +; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8878,8 +10510,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB78_11 -; RV64ZVE32F-NEXT: .LBB78_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB88_11 +; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -8924,7 +10556,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB79_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB89_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -8934,9 +10566,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: .LBB79_2: # %else +; RV64ZVE32F-NEXT: .LBB89_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB79_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB89_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8949,23 +10581,23 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 -; RV64ZVE32F-NEXT: .LBB79_4: # %else2 +; RV64ZVE32F-NEXT: .LBB89_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB79_12 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB79_13 -; RV64ZVE32F-NEXT: .LBB79_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_13 +; RV64ZVE32F-NEXT: .LBB89_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB79_14 -; RV64ZVE32F-NEXT: .LBB79_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_14 +; RV64ZVE32F-NEXT: .LBB89_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB79_9 -; RV64ZVE32F-NEXT: .LBB79_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB89_9 +; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -8977,18 +10609,18 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 -; RV64ZVE32F-NEXT: .LBB79_9: # %else14 +; RV64ZVE32F-NEXT: .LBB89_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB79_15 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB79_16 -; RV64ZVE32F-NEXT: .LBB79_11: # %else20 +; RV64ZVE32F-NEXT: bnez a2, .LBB89_16 +; RV64ZVE32F-NEXT: .LBB89_11: # %else20 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB79_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB89_12: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 @@ -8999,8 +10631,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB79_6 -; RV64ZVE32F-NEXT: .LBB79_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB89_6 +; RV64ZVE32F-NEXT: .LBB89_13: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -9013,8 +10645,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB79_7 -; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB89_7 +; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -9026,9 +10658,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB79_8 -; RV64ZVE32F-NEXT: j .LBB79_9 -; RV64ZVE32F-NEXT: .LBB79_15: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_8 +; RV64ZVE32F-NEXT: j .LBB89_9 +; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 @@ -9039,8 +10671,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB79_11 -; RV64ZVE32F-NEXT: .LBB79_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_11 +; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -9084,7 +10716,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB90_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -9092,9 +10724,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: .LBB80_2: # %else +; RV64ZVE32F-NEXT: .LBB90_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB80_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB90_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 @@ -9104,23 +10736,23 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 -; RV64ZVE32F-NEXT: .LBB80_4: # %else2 +; RV64ZVE32F-NEXT: .LBB90_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB80_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB80_13 -; RV64ZVE32F-NEXT: .LBB80_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_13 +; RV64ZVE32F-NEXT: .LBB90_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB80_14 -; RV64ZVE32F-NEXT: .LBB80_7: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_14 +; RV64ZVE32F-NEXT: .LBB90_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB80_9 -; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB90_9 +; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -9130,18 +10762,18 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 -; RV64ZVE32F-NEXT: .LBB80_9: # %else14 +; RV64ZVE32F-NEXT: .LBB90_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB80_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB80_16 -; RV64ZVE32F-NEXT: .LBB80_11: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB90_16 +; RV64ZVE32F-NEXT: .LBB90_11: # %else20 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB80_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB90_12: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -9150,8 +10782,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB80_6 -; RV64ZVE32F-NEXT: .LBB80_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB90_6 +; RV64ZVE32F-NEXT: .LBB90_13: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -9161,8 +10793,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB80_7 -; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB90_7 +; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -9171,9 +10803,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB80_8 -; RV64ZVE32F-NEXT: j .LBB80_9 -; RV64ZVE32F-NEXT: .LBB80_15: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_8 +; RV64ZVE32F-NEXT: j .LBB90_9 +; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -9182,8 +10814,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB80_11 -; RV64ZVE32F-NEXT: .LBB80_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB90_11 +; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -9221,22 +10853,22 @@ define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %p ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vfirst.m a0, v0 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_2 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_2 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a0) -; RV32ZVE32F-NEXT: .LBB81_2: # %else +; RV32ZVE32F-NEXT: .LBB91_2: # %else ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_v1f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vfirst.m a1, v0 -; RV64ZVE32F-NEXT: bnez a1, .LBB81_2 +; RV64ZVE32F-NEXT: bnez a1, .LBB91_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: fld fa0, 0(a0) -; RV64ZVE32F-NEXT: .LBB81_2: # %else +; RV64ZVE32F-NEXT: .LBB91_2: # %else ; RV64ZVE32F-NEXT: ret %v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru) ret <1 x double> %v @@ -9264,19 +10896,19 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 ; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_3 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_3 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a0, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_4 -; RV32ZVE32F-NEXT: .LBB82_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_4 +; RV32ZVE32F-NEXT: .LBB92_2: # %else2 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB82_3: # %cond.load +; RV32ZVE32F-NEXT: .LBB92_3: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a0, a0, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_2 -; RV32ZVE32F-NEXT: .LBB82_4: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_2 +; RV32ZVE32F-NEXT: .LBB92_4: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -9288,17 +10920,17 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: bnez a3, .LBB82_3 +; RV64ZVE32F-NEXT: bnez a3, .LBB92_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB82_4 -; RV64ZVE32F-NEXT: .LBB82_2: # %else2 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_4 +; RV64ZVE32F-NEXT: .LBB92_2: # %else2 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB82_3: # %cond.load +; RV64ZVE32F-NEXT: .LBB92_3: # %cond.load ; RV64ZVE32F-NEXT: fld fa0, 0(a0) ; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB82_2 -; RV64ZVE32F-NEXT: .LBB82_4: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a2, .LBB92_2 +; RV64ZVE32F-NEXT: .LBB92_4: # %cond.load1 ; RV64ZVE32F-NEXT: fld fa1, 0(a1) ; RV64ZVE32F-NEXT: ret %v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru) @@ -9327,89 +10959,89 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 ; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB83_6 +; RV32ZVE32F-NEXT: bnez a2, .LBB93_6 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB83_7 -; RV32ZVE32F-NEXT: .LBB83_2: # %else2 +; RV32ZVE32F-NEXT: bnez a2, .LBB93_7 +; RV32ZVE32F-NEXT: .LBB93_2: # %else2 ; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB83_8 -; RV32ZVE32F-NEXT: .LBB83_3: # %else5 +; RV32ZVE32F-NEXT: bnez a2, .LBB93_8 +; RV32ZVE32F-NEXT: .LBB93_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a1, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_5 -; RV32ZVE32F-NEXT: .LBB83_4: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_5 +; RV32ZVE32F-NEXT: .LBB93_4: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) -; RV32ZVE32F-NEXT: .LBB83_5: # %else8 +; RV32ZVE32F-NEXT: .LBB93_5: # %else8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) ; RV32ZVE32F-NEXT: fsd fa3, 24(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB83_6: # %cond.load +; RV32ZVE32F-NEXT: .LBB93_6: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB83_2 -; RV32ZVE32F-NEXT: .LBB83_7: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a2, .LBB93_2 +; RV32ZVE32F-NEXT: .LBB93_7: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9 ; RV32ZVE32F-NEXT: fld fa1, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB83_3 -; RV32ZVE32F-NEXT: .LBB83_8: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a2, .LBB93_3 +; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9 ; RV32ZVE32F-NEXT: fld fa2, 0(a2) ; RV32ZVE32F-NEXT: andi a1, a1, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_4 -; RV32ZVE32F-NEXT: j .LBB83_5 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_4 +; RV32ZVE32F-NEXT: j .LBB93_5 ; ; RV64ZVE32F-LABEL: mgather_v4f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_6 +; RV64ZVE32F-NEXT: bnez a3, .LBB93_6 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_7 -; RV64ZVE32F-NEXT: .LBB83_2: # %else2 +; RV64ZVE32F-NEXT: bnez a3, .LBB93_7 +; RV64ZVE32F-NEXT: .LBB93_2: # %else2 ; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_8 -; RV64ZVE32F-NEXT: .LBB83_3: # %else5 +; RV64ZVE32F-NEXT: bnez a3, .LBB93_8 +; RV64ZVE32F-NEXT: .LBB93_3: # %else5 ; RV64ZVE32F-NEXT: andi a2, a2, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB83_5 -; RV64ZVE32F-NEXT: .LBB83_4: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB93_5 +; RV64ZVE32F-NEXT: .LBB93_4: # %cond.load7 ; RV64ZVE32F-NEXT: ld a1, 24(a1) ; RV64ZVE32F-NEXT: fld fa3, 0(a1) -; RV64ZVE32F-NEXT: .LBB83_5: # %else8 +; RV64ZVE32F-NEXT: .LBB93_5: # %else8 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) ; RV64ZVE32F-NEXT: fsd fa3, 24(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB83_6: # %cond.load +; RV64ZVE32F-NEXT: .LBB93_6: # %cond.load ; RV64ZVE32F-NEXT: ld a3, 0(a1) ; RV64ZVE32F-NEXT: fld fa0, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB83_2 -; RV64ZVE32F-NEXT: .LBB83_7: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a3, .LBB93_2 +; RV64ZVE32F-NEXT: .LBB93_7: # %cond.load1 ; RV64ZVE32F-NEXT: ld a3, 8(a1) ; RV64ZVE32F-NEXT: fld fa1, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB83_3 -; RV64ZVE32F-NEXT: .LBB83_8: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a3, .LBB93_3 +; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load4 ; RV64ZVE32F-NEXT: ld a3, 16(a1) ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a2, a2, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB83_4 -; RV64ZVE32F-NEXT: j .LBB83_5 +; RV64ZVE32F-NEXT: bnez a2, .LBB93_4 +; RV64ZVE32F-NEXT: j .LBB93_5 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru) ret <4 x double> %v } @@ -9519,34 +11151,34 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 ; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_10 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_11 -; RV32ZVE32F-NEXT: .LBB86_2: # %else2 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_11 +; RV32ZVE32F-NEXT: .LBB96_2: # %else2 ; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_12 -; RV32ZVE32F-NEXT: .LBB86_3: # %else5 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_12 +; RV32ZVE32F-NEXT: .LBB96_3: # %else5 ; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_13 -; RV32ZVE32F-NEXT: .LBB86_4: # %else8 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_13 +; RV32ZVE32F-NEXT: .LBB96_4: # %else8 ; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_14 -; RV32ZVE32F-NEXT: .LBB86_5: # %else11 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_14 +; RV32ZVE32F-NEXT: .LBB96_5: # %else11 ; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_15 -; RV32ZVE32F-NEXT: .LBB86_6: # %else14 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_15 +; RV32ZVE32F-NEXT: .LBB96_6: # %else14 ; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_16 -; RV32ZVE32F-NEXT: .LBB86_7: # %else17 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_16 +; RV32ZVE32F-NEXT: .LBB96_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a1, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_9 -; RV32ZVE32F-NEXT: .LBB86_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_9 +; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB86_9: # %else20 +; RV32ZVE32F-NEXT: .LBB96_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -9556,87 +11188,87 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB86_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB86_2 -; RV32ZVE32F-NEXT: .LBB86_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a2, .LBB96_2 +; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB86_3 -; RV32ZVE32F-NEXT: .LBB86_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a2, .LBB96_3 +; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB86_4 -; RV32ZVE32F-NEXT: .LBB86_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a2, .LBB96_4 +; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB86_5 -; RV32ZVE32F-NEXT: .LBB86_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a2, .LBB96_5 +; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB86_6 -; RV32ZVE32F-NEXT: .LBB86_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a2, .LBB96_6 +; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB86_7 -; RV32ZVE32F-NEXT: .LBB86_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a2, .LBB96_7 +; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a2) ; RV32ZVE32F-NEXT: andi a1, a1, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_8 -; RV32ZVE32F-NEXT: j .LBB86_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_8 +; RV32ZVE32F-NEXT: j .LBB96_9 ; ; RV64ZVE32F-LABEL: mgather_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_10 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_10 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_11 -; RV64ZVE32F-NEXT: .LBB86_2: # %else2 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_11 +; RV64ZVE32F-NEXT: .LBB96_2: # %else2 ; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_12 -; RV64ZVE32F-NEXT: .LBB86_3: # %else5 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_12 +; RV64ZVE32F-NEXT: .LBB96_3: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_13 -; RV64ZVE32F-NEXT: .LBB86_4: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_13 +; RV64ZVE32F-NEXT: .LBB96_4: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_14 -; RV64ZVE32F-NEXT: .LBB86_5: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_14 +; RV64ZVE32F-NEXT: .LBB96_5: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_15 -; RV64ZVE32F-NEXT: .LBB86_6: # %else14 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_15 +; RV64ZVE32F-NEXT: .LBB96_6: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_16 -; RV64ZVE32F-NEXT: .LBB86_7: # %else17 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_16 +; RV64ZVE32F-NEXT: .LBB96_7: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB86_9 -; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a2, .LBB96_9 +; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19 ; RV64ZVE32F-NEXT: ld a1, 56(a1) ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB86_9: # %else20 +; RV64ZVE32F-NEXT: .LBB96_9: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -9646,42 +11278,42 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB86_10: # %cond.load +; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load ; RV64ZVE32F-NEXT: ld a3, 0(a1) ; RV64ZVE32F-NEXT: fld fa0, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_2 -; RV64ZVE32F-NEXT: .LBB86_11: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_2 +; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1 ; RV64ZVE32F-NEXT: ld a3, 8(a1) ; RV64ZVE32F-NEXT: fld fa1, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_3 -; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_3 +; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4 ; RV64ZVE32F-NEXT: ld a3, 16(a1) ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_4 -; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_4 +; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7 ; RV64ZVE32F-NEXT: ld a3, 24(a1) ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_5 -; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_5 +; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10 ; RV64ZVE32F-NEXT: ld a3, 32(a1) ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_6 -; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_6 +; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13 ; RV64ZVE32F-NEXT: ld a3, 40(a1) ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 64 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_7 -; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load16 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_7 +; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16 ; RV64ZVE32F-NEXT: ld a3, 48(a1) ; RV64ZVE32F-NEXT: fld fa6, 0(a3) ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB86_8 -; RV64ZVE32F-NEXT: j .LBB86_9 +; RV64ZVE32F-NEXT: bnez a2, .LBB96_8 +; RV64ZVE32F-NEXT: j .LBB96_9 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) ret <8 x double> %v } @@ -9716,34 +11348,34 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB87_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB97_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_11 -; RV32ZVE32F-NEXT: .LBB87_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_11 +; RV32ZVE32F-NEXT: .LBB97_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_12 -; RV32ZVE32F-NEXT: .LBB87_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_12 +; RV32ZVE32F-NEXT: .LBB97_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_13 -; RV32ZVE32F-NEXT: .LBB87_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_13 +; RV32ZVE32F-NEXT: .LBB97_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_14 -; RV32ZVE32F-NEXT: .LBB87_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_14 +; RV32ZVE32F-NEXT: .LBB97_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_15 -; RV32ZVE32F-NEXT: .LBB87_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_15 +; RV32ZVE32F-NEXT: .LBB97_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_16 -; RV32ZVE32F-NEXT: .LBB87_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_16 +; RV32ZVE32F-NEXT: .LBB97_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_9 -; RV32ZVE32F-NEXT: .LBB87_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB97_9 +; RV32ZVE32F-NEXT: .LBB97_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB87_9: # %else20 +; RV32ZVE32F-NEXT: .LBB97_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -9753,69 +11385,69 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB97_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 -; RV32ZVE32F-NEXT: .LBB87_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB97_2 +; RV32ZVE32F-NEXT: .LBB97_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 -; RV32ZVE32F-NEXT: .LBB87_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB97_3 +; RV32ZVE32F-NEXT: .LBB97_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_4 -; RV32ZVE32F-NEXT: .LBB87_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB97_4 +; RV32ZVE32F-NEXT: .LBB97_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_5 -; RV32ZVE32F-NEXT: .LBB87_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB97_5 +; RV32ZVE32F-NEXT: .LBB97_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_6 -; RV32ZVE32F-NEXT: .LBB87_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB97_6 +; RV32ZVE32F-NEXT: .LBB97_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_7 -; RV32ZVE32F-NEXT: .LBB87_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB97_7 +; RV32ZVE32F-NEXT: .LBB97_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_8 -; RV32ZVE32F-NEXT: j .LBB87_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_8 +; RV32ZVE32F-NEXT: j .LBB97_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB87_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB97_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB87_2: # %else +; RV64ZVE32F-NEXT: .LBB97_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB87_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB97_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -9823,47 +11455,47 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB87_4: # %else2 +; RV64ZVE32F-NEXT: .LBB97_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB87_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB97_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB87_15 -; RV64ZVE32F-NEXT: .LBB87_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB97_15 +; RV64ZVE32F-NEXT: .LBB97_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB87_16 -; RV64ZVE32F-NEXT: .LBB87_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB97_16 +; RV64ZVE32F-NEXT: .LBB97_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB87_9 -; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB97_9 +; RV64ZVE32F-NEXT: .LBB97_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB87_9: # %else14 +; RV64ZVE32F-NEXT: .LBB97_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB87_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB97_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB87_11: # %else17 +; RV64ZVE32F-NEXT: .LBB97_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB87_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB97_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB87_13: # %else20 +; RV64ZVE32F-NEXT: .LBB97_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -9873,29 +11505,29 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB87_6 -; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB97_6 +; RV64ZVE32F-NEXT: .LBB97_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB87_7 -; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB97_7 +; RV64ZVE32F-NEXT: .LBB97_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB87_8 -; RV64ZVE32F-NEXT: j .LBB87_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB97_8 +; RV64ZVE32F-NEXT: j .LBB97_9 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) ret <8 x double> %v @@ -9931,34 +11563,34 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB88_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB98_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_11 -; RV32ZVE32F-NEXT: .LBB88_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_11 +; RV32ZVE32F-NEXT: .LBB98_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_12 -; RV32ZVE32F-NEXT: .LBB88_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_12 +; RV32ZVE32F-NEXT: .LBB98_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_13 -; RV32ZVE32F-NEXT: .LBB88_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_13 +; RV32ZVE32F-NEXT: .LBB98_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_14 -; RV32ZVE32F-NEXT: .LBB88_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_14 +; RV32ZVE32F-NEXT: .LBB98_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_15 -; RV32ZVE32F-NEXT: .LBB88_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_15 +; RV32ZVE32F-NEXT: .LBB98_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_16 -; RV32ZVE32F-NEXT: .LBB88_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_16 +; RV32ZVE32F-NEXT: .LBB98_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_9 -; RV32ZVE32F-NEXT: .LBB88_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB98_9 +; RV32ZVE32F-NEXT: .LBB98_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB88_9: # %else20 +; RV32ZVE32F-NEXT: .LBB98_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -9968,69 +11600,69 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB98_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_2 -; RV32ZVE32F-NEXT: .LBB88_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB98_2 +; RV32ZVE32F-NEXT: .LBB98_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_3 -; RV32ZVE32F-NEXT: .LBB88_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB98_3 +; RV32ZVE32F-NEXT: .LBB98_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_4 -; RV32ZVE32F-NEXT: .LBB88_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB98_4 +; RV32ZVE32F-NEXT: .LBB98_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_5 -; RV32ZVE32F-NEXT: .LBB88_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB98_5 +; RV32ZVE32F-NEXT: .LBB98_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_6 -; RV32ZVE32F-NEXT: .LBB88_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB98_6 +; RV32ZVE32F-NEXT: .LBB98_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_7 -; RV32ZVE32F-NEXT: .LBB88_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB98_7 +; RV32ZVE32F-NEXT: .LBB98_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_8 -; RV32ZVE32F-NEXT: j .LBB88_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_8 +; RV32ZVE32F-NEXT: j .LBB98_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB88_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB98_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB88_2: # %else +; RV64ZVE32F-NEXT: .LBB98_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB88_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB98_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -10038,47 +11670,47 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB88_4: # %else2 +; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB88_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB98_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB88_15 -; RV64ZVE32F-NEXT: .LBB88_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB98_15 +; RV64ZVE32F-NEXT: .LBB98_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB88_16 -; RV64ZVE32F-NEXT: .LBB88_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB98_16 +; RV64ZVE32F-NEXT: .LBB98_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB88_9 -; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB98_9 +; RV64ZVE32F-NEXT: .LBB98_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB88_9: # %else14 +; RV64ZVE32F-NEXT: .LBB98_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB88_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB98_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB88_11: # %else17 +; RV64ZVE32F-NEXT: .LBB98_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB88_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB98_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB88_13: # %else20 +; RV64ZVE32F-NEXT: .LBB98_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10088,29 +11720,29 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB88_6 -; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB98_6 +; RV64ZVE32F-NEXT: .LBB98_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB88_7 -; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB98_7 +; RV64ZVE32F-NEXT: .LBB98_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB88_8 -; RV64ZVE32F-NEXT: j .LBB88_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB98_8 +; RV64ZVE32F-NEXT: j .LBB98_9 %eidxs = sext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) @@ -10148,34 +11780,34 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB89_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB99_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_11 -; RV32ZVE32F-NEXT: .LBB89_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_11 +; RV32ZVE32F-NEXT: .LBB99_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_12 -; RV32ZVE32F-NEXT: .LBB89_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_12 +; RV32ZVE32F-NEXT: .LBB99_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_13 -; RV32ZVE32F-NEXT: .LBB89_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_13 +; RV32ZVE32F-NEXT: .LBB99_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_14 -; RV32ZVE32F-NEXT: .LBB89_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_14 +; RV32ZVE32F-NEXT: .LBB99_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_15 -; RV32ZVE32F-NEXT: .LBB89_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_15 +; RV32ZVE32F-NEXT: .LBB99_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_16 -; RV32ZVE32F-NEXT: .LBB89_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_16 +; RV32ZVE32F-NEXT: .LBB99_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_9 -; RV32ZVE32F-NEXT: .LBB89_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB99_9 +; RV32ZVE32F-NEXT: .LBB99_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB89_9: # %else20 +; RV32ZVE32F-NEXT: .LBB99_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10185,70 +11817,70 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB99_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_2 -; RV32ZVE32F-NEXT: .LBB89_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB99_2 +; RV32ZVE32F-NEXT: .LBB99_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_3 -; RV32ZVE32F-NEXT: .LBB89_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB99_3 +; RV32ZVE32F-NEXT: .LBB99_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_4 -; RV32ZVE32F-NEXT: .LBB89_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB99_4 +; RV32ZVE32F-NEXT: .LBB99_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_5 -; RV32ZVE32F-NEXT: .LBB89_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB99_5 +; RV32ZVE32F-NEXT: .LBB99_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_6 -; RV32ZVE32F-NEXT: .LBB89_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB99_6 +; RV32ZVE32F-NEXT: .LBB99_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_7 -; RV32ZVE32F-NEXT: .LBB89_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB99_7 +; RV32ZVE32F-NEXT: .LBB99_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_8 -; RV32ZVE32F-NEXT: j .LBB89_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_8 +; RV32ZVE32F-NEXT: j .LBB99_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB99_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: andi a3, a3, 255 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB89_2: # %else +; RV64ZVE32F-NEXT: .LBB99_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB99_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -10257,42 +11889,42 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB89_4: # %else2 +; RV64ZVE32F-NEXT: .LBB99_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB99_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_15 -; RV64ZVE32F-NEXT: .LBB89_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB99_15 +; RV64ZVE32F-NEXT: .LBB99_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_16 -; RV64ZVE32F-NEXT: .LBB89_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB99_16 +; RV64ZVE32F-NEXT: .LBB99_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_9 -; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB99_9 +; RV64ZVE32F-NEXT: .LBB99_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: andi a3, a3, 255 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB89_9: # %else14 +; RV64ZVE32F-NEXT: .LBB99_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB99_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: andi a3, a3, 255 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB89_11: # %else17 +; RV64ZVE32F-NEXT: .LBB99_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB99_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10300,7 +11932,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB89_13: # %else20 +; RV64ZVE32F-NEXT: .LBB99_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10310,15 +11942,15 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB99_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: andi a3, a3, 255 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_6 -; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB99_6 +; RV64ZVE32F-NEXT: .LBB99_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: andi a3, a3, 255 @@ -10326,16 +11958,16 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_7 -; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB99_7 +; RV64ZVE32F-NEXT: .LBB99_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: andi a3, a3, 255 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_8 -; RV64ZVE32F-NEXT: j .LBB89_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB99_8 +; RV64ZVE32F-NEXT: j .LBB99_9 %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) @@ -10372,34 +12004,34 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB90_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB100_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 -; RV32ZVE32F-NEXT: .LBB90_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB100_11 +; RV32ZVE32F-NEXT: .LBB100_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 -; RV32ZVE32F-NEXT: .LBB90_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB100_12 +; RV32ZVE32F-NEXT: .LBB100_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 -; RV32ZVE32F-NEXT: .LBB90_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB100_13 +; RV32ZVE32F-NEXT: .LBB100_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 -; RV32ZVE32F-NEXT: .LBB90_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB100_14 +; RV32ZVE32F-NEXT: .LBB100_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 -; RV32ZVE32F-NEXT: .LBB90_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB100_15 +; RV32ZVE32F-NEXT: .LBB100_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_16 -; RV32ZVE32F-NEXT: .LBB90_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB100_16 +; RV32ZVE32F-NEXT: .LBB100_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_9 -; RV32ZVE32F-NEXT: .LBB90_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB100_9 +; RV32ZVE32F-NEXT: .LBB100_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB90_9: # %else20 +; RV32ZVE32F-NEXT: .LBB100_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10409,70 +12041,70 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB100_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 -; RV32ZVE32F-NEXT: .LBB90_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB100_2 +; RV32ZVE32F-NEXT: .LBB100_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 -; RV32ZVE32F-NEXT: .LBB90_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB100_3 +; RV32ZVE32F-NEXT: .LBB100_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 -; RV32ZVE32F-NEXT: .LBB90_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB100_4 +; RV32ZVE32F-NEXT: .LBB100_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 -; RV32ZVE32F-NEXT: .LBB90_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB100_5 +; RV32ZVE32F-NEXT: .LBB100_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 -; RV32ZVE32F-NEXT: .LBB90_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB100_6 +; RV32ZVE32F-NEXT: .LBB100_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 -; RV32ZVE32F-NEXT: .LBB90_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB100_7 +; RV32ZVE32F-NEXT: .LBB100_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_8 -; RV32ZVE32F-NEXT: j .LBB90_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB100_8 +; RV32ZVE32F-NEXT: j .LBB100_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB90_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB100_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB90_2: # %else +; RV64ZVE32F-NEXT: .LBB100_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB90_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB100_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -10480,47 +12112,47 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB90_4: # %else2 +; RV64ZVE32F-NEXT: .LBB100_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB90_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB100_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB90_15 -; RV64ZVE32F-NEXT: .LBB90_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB100_15 +; RV64ZVE32F-NEXT: .LBB100_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB90_16 -; RV64ZVE32F-NEXT: .LBB90_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB100_16 +; RV64ZVE32F-NEXT: .LBB100_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB90_9 -; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB100_9 +; RV64ZVE32F-NEXT: .LBB100_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB90_9: # %else14 +; RV64ZVE32F-NEXT: .LBB100_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB90_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB100_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB90_11: # %else17 +; RV64ZVE32F-NEXT: .LBB100_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB90_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB100_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB90_13: # %else20 +; RV64ZVE32F-NEXT: .LBB100_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10530,29 +12162,29 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB100_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB90_6 -; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB100_6 +; RV64ZVE32F-NEXT: .LBB100_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB90_7 -; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB100_7 +; RV64ZVE32F-NEXT: .LBB100_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB90_8 -; RV64ZVE32F-NEXT: j .LBB90_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB100_8 +; RV64ZVE32F-NEXT: j .LBB100_9 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) ret <8 x double> %v @@ -10588,34 +12220,34 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB91_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB101_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB91_11 -; RV32ZVE32F-NEXT: .LBB91_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB101_11 +; RV32ZVE32F-NEXT: .LBB101_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB91_12 -; RV32ZVE32F-NEXT: .LBB91_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB101_12 +; RV32ZVE32F-NEXT: .LBB101_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB91_13 -; RV32ZVE32F-NEXT: .LBB91_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB101_13 +; RV32ZVE32F-NEXT: .LBB101_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB91_14 -; RV32ZVE32F-NEXT: .LBB91_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB101_14 +; RV32ZVE32F-NEXT: .LBB101_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB91_15 -; RV32ZVE32F-NEXT: .LBB91_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB101_15 +; RV32ZVE32F-NEXT: .LBB101_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB91_16 -; RV32ZVE32F-NEXT: .LBB91_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB101_16 +; RV32ZVE32F-NEXT: .LBB101_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB91_9 -; RV32ZVE32F-NEXT: .LBB91_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB101_9 +; RV32ZVE32F-NEXT: .LBB101_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB91_9: # %else20 +; RV32ZVE32F-NEXT: .LBB101_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10625,70 +12257,70 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB101_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB91_2 -; RV32ZVE32F-NEXT: .LBB91_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB101_2 +; RV32ZVE32F-NEXT: .LBB101_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB91_3 -; RV32ZVE32F-NEXT: .LBB91_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB101_3 +; RV32ZVE32F-NEXT: .LBB101_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB91_4 -; RV32ZVE32F-NEXT: .LBB91_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB101_4 +; RV32ZVE32F-NEXT: .LBB101_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB91_5 -; RV32ZVE32F-NEXT: .LBB91_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB101_5 +; RV32ZVE32F-NEXT: .LBB101_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB91_6 -; RV32ZVE32F-NEXT: .LBB91_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB101_6 +; RV32ZVE32F-NEXT: .LBB101_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB91_7 -; RV32ZVE32F-NEXT: .LBB91_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB101_7 +; RV32ZVE32F-NEXT: .LBB101_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB91_8 -; RV32ZVE32F-NEXT: j .LBB91_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB101_8 +; RV32ZVE32F-NEXT: j .LBB101_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB91_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB101_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB91_2: # %else +; RV64ZVE32F-NEXT: .LBB101_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB91_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB101_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -10696,47 +12328,47 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB91_4: # %else2 +; RV64ZVE32F-NEXT: .LBB101_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB91_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB101_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB91_15 -; RV64ZVE32F-NEXT: .LBB91_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB101_15 +; RV64ZVE32F-NEXT: .LBB101_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB91_16 -; RV64ZVE32F-NEXT: .LBB91_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB101_16 +; RV64ZVE32F-NEXT: .LBB101_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB91_9 -; RV64ZVE32F-NEXT: .LBB91_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB101_9 +; RV64ZVE32F-NEXT: .LBB101_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB91_9: # %else14 +; RV64ZVE32F-NEXT: .LBB101_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB91_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB101_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB91_11: # %else17 +; RV64ZVE32F-NEXT: .LBB101_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB91_13: # %else20 +; RV64ZVE32F-NEXT: .LBB101_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10746,29 +12378,29 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB91_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB101_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB91_6 -; RV64ZVE32F-NEXT: .LBB91_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB101_6 +; RV64ZVE32F-NEXT: .LBB101_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB91_7 -; RV64ZVE32F-NEXT: .LBB91_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB101_7 +; RV64ZVE32F-NEXT: .LBB101_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB91_8 -; RV64ZVE32F-NEXT: j .LBB91_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB101_8 +; RV64ZVE32F-NEXT: j .LBB101_9 %eidxs = sext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) @@ -10806,34 +12438,34 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB92_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB102_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB92_11 -; RV32ZVE32F-NEXT: .LBB92_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB102_11 +; RV32ZVE32F-NEXT: .LBB102_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB92_12 -; RV32ZVE32F-NEXT: .LBB92_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB102_12 +; RV32ZVE32F-NEXT: .LBB102_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB92_13 -; RV32ZVE32F-NEXT: .LBB92_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB102_13 +; RV32ZVE32F-NEXT: .LBB102_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB92_14 -; RV32ZVE32F-NEXT: .LBB92_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB102_14 +; RV32ZVE32F-NEXT: .LBB102_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB92_15 -; RV32ZVE32F-NEXT: .LBB92_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB102_15 +; RV32ZVE32F-NEXT: .LBB102_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB92_16 -; RV32ZVE32F-NEXT: .LBB92_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB102_16 +; RV32ZVE32F-NEXT: .LBB102_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB92_9 -; RV32ZVE32F-NEXT: .LBB92_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB102_9 +; RV32ZVE32F-NEXT: .LBB102_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB92_9: # %else20 +; RV32ZVE32F-NEXT: .LBB102_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10843,54 +12475,54 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB102_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB92_2 -; RV32ZVE32F-NEXT: .LBB92_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB102_2 +; RV32ZVE32F-NEXT: .LBB102_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB92_3 -; RV32ZVE32F-NEXT: .LBB92_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB102_3 +; RV32ZVE32F-NEXT: .LBB102_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB92_4 -; RV32ZVE32F-NEXT: .LBB92_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB102_4 +; RV32ZVE32F-NEXT: .LBB102_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB92_5 -; RV32ZVE32F-NEXT: .LBB92_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB102_5 +; RV32ZVE32F-NEXT: .LBB102_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB92_6 -; RV32ZVE32F-NEXT: .LBB92_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB102_6 +; RV32ZVE32F-NEXT: .LBB102_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB92_7 -; RV32ZVE32F-NEXT: .LBB92_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB102_7 +; RV32ZVE32F-NEXT: .LBB102_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB92_8 -; RV32ZVE32F-NEXT: j .LBB92_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB102_8 +; RV32ZVE32F-NEXT: j .LBB102_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV64ZVE32F: # %bb.0: @@ -10899,7 +12531,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a4, a3, 1 ; RV64ZVE32F-NEXT: addiw a2, a2, -1 -; RV64ZVE32F-NEXT: beqz a4, .LBB92_2 +; RV64ZVE32F-NEXT: beqz a4, .LBB102_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a4, v8 @@ -10907,9 +12539,9 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa0, 0(a4) -; RV64ZVE32F-NEXT: .LBB92_2: # %else +; RV64ZVE32F-NEXT: .LBB102_2: # %else ; RV64ZVE32F-NEXT: andi a4, a3, 2 -; RV64ZVE32F-NEXT: beqz a4, .LBB92_4 +; RV64ZVE32F-NEXT: beqz a4, .LBB102_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -10918,42 +12550,42 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa1, 0(a4) -; RV64ZVE32F-NEXT: .LBB92_4: # %else2 +; RV64ZVE32F-NEXT: .LBB102_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a4, a3, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a4, .LBB92_14 +; RV64ZVE32F-NEXT: bnez a4, .LBB102_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a4, a3, 8 -; RV64ZVE32F-NEXT: bnez a4, .LBB92_15 -; RV64ZVE32F-NEXT: .LBB92_6: # %else8 +; RV64ZVE32F-NEXT: bnez a4, .LBB102_15 +; RV64ZVE32F-NEXT: .LBB102_6: # %else8 ; RV64ZVE32F-NEXT: andi a4, a3, 16 -; RV64ZVE32F-NEXT: bnez a4, .LBB92_16 -; RV64ZVE32F-NEXT: .LBB92_7: # %else11 +; RV64ZVE32F-NEXT: bnez a4, .LBB102_16 +; RV64ZVE32F-NEXT: .LBB102_7: # %else11 ; RV64ZVE32F-NEXT: andi a4, a3, 32 -; RV64ZVE32F-NEXT: beqz a4, .LBB92_9 -; RV64ZVE32F-NEXT: .LBB92_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a4, .LBB102_9 +; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8 ; RV64ZVE32F-NEXT: and a4, a4, a2 ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa5, 0(a4) -; RV64ZVE32F-NEXT: .LBB92_9: # %else14 +; RV64ZVE32F-NEXT: .LBB102_9: # %else14 ; RV64ZVE32F-NEXT: andi a4, a3, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a4, .LBB92_11 +; RV64ZVE32F-NEXT: beqz a4, .LBB102_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8 ; RV64ZVE32F-NEXT: and a4, a4, a2 ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa6, 0(a4) -; RV64ZVE32F-NEXT: .LBB92_11: # %else17 +; RV64ZVE32F-NEXT: .LBB102_11: # %else17 ; RV64ZVE32F-NEXT: andi a3, a3, -128 -; RV64ZVE32F-NEXT: beqz a3, .LBB92_13 +; RV64ZVE32F-NEXT: beqz a3, .LBB102_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -10961,7 +12593,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB92_13: # %else20 +; RV64ZVE32F-NEXT: .LBB102_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -10971,15 +12603,15 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB92_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB102_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8 ; RV64ZVE32F-NEXT: and a4, a4, a2 ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa2, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 8 -; RV64ZVE32F-NEXT: beqz a4, .LBB92_6 -; RV64ZVE32F-NEXT: .LBB92_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a4, .LBB102_6 +; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8 ; RV64ZVE32F-NEXT: and a4, a4, a2 @@ -10987,16 +12619,16 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa3, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 16 -; RV64ZVE32F-NEXT: beqz a4, .LBB92_7 -; RV64ZVE32F-NEXT: .LBB92_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a4, .LBB102_7 +; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9 ; RV64ZVE32F-NEXT: and a4, a4, a2 ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa4, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 32 -; RV64ZVE32F-NEXT: bnez a4, .LBB92_8 -; RV64ZVE32F-NEXT: j .LBB92_9 +; RV64ZVE32F-NEXT: bnez a4, .LBB102_8 +; RV64ZVE32F-NEXT: j .LBB102_9 %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) @@ -11031,34 +12663,34 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB93_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB103_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB93_11 -; RV32ZVE32F-NEXT: .LBB93_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_11 +; RV32ZVE32F-NEXT: .LBB103_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB93_12 -; RV32ZVE32F-NEXT: .LBB93_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_12 +; RV32ZVE32F-NEXT: .LBB103_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB93_13 -; RV32ZVE32F-NEXT: .LBB93_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_13 +; RV32ZVE32F-NEXT: .LBB103_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB93_14 -; RV32ZVE32F-NEXT: .LBB93_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_14 +; RV32ZVE32F-NEXT: .LBB103_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB93_15 -; RV32ZVE32F-NEXT: .LBB93_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_15 +; RV32ZVE32F-NEXT: .LBB103_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB93_16 -; RV32ZVE32F-NEXT: .LBB93_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_16 +; RV32ZVE32F-NEXT: .LBB103_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB93_9 -; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB103_9 +; RV32ZVE32F-NEXT: .LBB103_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB93_9: # %else20 +; RV32ZVE32F-NEXT: .LBB103_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11068,70 +12700,70 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB103_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB93_2 -; RV32ZVE32F-NEXT: .LBB93_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB103_2 +; RV32ZVE32F-NEXT: .LBB103_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB93_3 -; RV32ZVE32F-NEXT: .LBB93_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB103_3 +; RV32ZVE32F-NEXT: .LBB103_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB93_4 -; RV32ZVE32F-NEXT: .LBB93_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB103_4 +; RV32ZVE32F-NEXT: .LBB103_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB93_5 -; RV32ZVE32F-NEXT: .LBB93_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB103_5 +; RV32ZVE32F-NEXT: .LBB103_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB93_6 -; RV32ZVE32F-NEXT: .LBB93_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB103_6 +; RV32ZVE32F-NEXT: .LBB103_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB93_7 -; RV32ZVE32F-NEXT: .LBB93_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB103_7 +; RV32ZVE32F-NEXT: .LBB103_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB93_8 -; RV32ZVE32F-NEXT: j .LBB93_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_8 +; RV32ZVE32F-NEXT: j .LBB103_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB93_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB103_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB93_2: # %else +; RV64ZVE32F-NEXT: .LBB103_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB93_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB103_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 @@ -11139,47 +12771,47 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB93_4: # %else2 +; RV64ZVE32F-NEXT: .LBB103_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB93_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB103_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB93_15 -; RV64ZVE32F-NEXT: .LBB93_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB103_15 +; RV64ZVE32F-NEXT: .LBB103_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB93_16 -; RV64ZVE32F-NEXT: .LBB93_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB103_16 +; RV64ZVE32F-NEXT: .LBB103_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB93_9 -; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB103_9 +; RV64ZVE32F-NEXT: .LBB103_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB93_9: # %else14 +; RV64ZVE32F-NEXT: .LBB103_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB93_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB103_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB93_11: # %else17 +; RV64ZVE32F-NEXT: .LBB103_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB93_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB103_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB93_13: # %else20 +; RV64ZVE32F-NEXT: .LBB103_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11189,29 +12821,29 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB93_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB103_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB93_6 -; RV64ZVE32F-NEXT: .LBB93_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB103_6 +; RV64ZVE32F-NEXT: .LBB103_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB93_7 -; RV64ZVE32F-NEXT: .LBB93_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB103_7 +; RV64ZVE32F-NEXT: .LBB103_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB93_8 -; RV64ZVE32F-NEXT: j .LBB93_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB103_8 +; RV64ZVE32F-NEXT: j .LBB103_9 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) ret <8 x double> %v @@ -11245,34 +12877,34 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB94_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB104_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB94_11 -; RV32ZVE32F-NEXT: .LBB94_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_11 +; RV32ZVE32F-NEXT: .LBB104_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB94_12 -; RV32ZVE32F-NEXT: .LBB94_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_12 +; RV32ZVE32F-NEXT: .LBB104_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB94_13 -; RV32ZVE32F-NEXT: .LBB94_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_13 +; RV32ZVE32F-NEXT: .LBB104_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB94_14 -; RV32ZVE32F-NEXT: .LBB94_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_14 +; RV32ZVE32F-NEXT: .LBB104_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB94_15 -; RV32ZVE32F-NEXT: .LBB94_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_15 +; RV32ZVE32F-NEXT: .LBB104_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB94_16 -; RV32ZVE32F-NEXT: .LBB94_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_16 +; RV32ZVE32F-NEXT: .LBB104_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB94_9 -; RV32ZVE32F-NEXT: .LBB94_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB104_9 +; RV32ZVE32F-NEXT: .LBB104_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB94_9: # %else20 +; RV32ZVE32F-NEXT: .LBB104_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11282,70 +12914,70 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB104_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB94_2 -; RV32ZVE32F-NEXT: .LBB94_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB104_2 +; RV32ZVE32F-NEXT: .LBB104_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB94_3 -; RV32ZVE32F-NEXT: .LBB94_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB104_3 +; RV32ZVE32F-NEXT: .LBB104_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB94_4 -; RV32ZVE32F-NEXT: .LBB94_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB104_4 +; RV32ZVE32F-NEXT: .LBB104_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB94_5 -; RV32ZVE32F-NEXT: .LBB94_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB104_5 +; RV32ZVE32F-NEXT: .LBB104_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB94_6 -; RV32ZVE32F-NEXT: .LBB94_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB104_6 +; RV32ZVE32F-NEXT: .LBB104_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB94_7 -; RV32ZVE32F-NEXT: .LBB94_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB104_7 +; RV32ZVE32F-NEXT: .LBB104_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB94_8 -; RV32ZVE32F-NEXT: j .LBB94_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_8 +; RV32ZVE32F-NEXT: j .LBB104_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB94_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB104_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB94_2: # %else +; RV64ZVE32F-NEXT: .LBB104_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB94_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB104_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 @@ -11353,47 +12985,47 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB94_4: # %else2 +; RV64ZVE32F-NEXT: .LBB104_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB94_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB104_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB94_15 -; RV64ZVE32F-NEXT: .LBB94_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB104_15 +; RV64ZVE32F-NEXT: .LBB104_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB94_16 -; RV64ZVE32F-NEXT: .LBB94_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB104_16 +; RV64ZVE32F-NEXT: .LBB104_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB94_9 -; RV64ZVE32F-NEXT: .LBB94_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB104_9 +; RV64ZVE32F-NEXT: .LBB104_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB94_9: # %else14 +; RV64ZVE32F-NEXT: .LBB104_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB94_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB104_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB94_11: # %else17 +; RV64ZVE32F-NEXT: .LBB104_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB94_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB104_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB94_13: # %else20 +; RV64ZVE32F-NEXT: .LBB104_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11403,29 +13035,29 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB94_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB104_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB94_6 -; RV64ZVE32F-NEXT: .LBB94_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB104_6 +; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB94_7 -; RV64ZVE32F-NEXT: .LBB94_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB104_7 +; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB94_8 -; RV64ZVE32F-NEXT: j .LBB94_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB104_8 +; RV64ZVE32F-NEXT: j .LBB104_9 %eidxs = sext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) @@ -11460,34 +13092,34 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB95_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB105_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB95_11 -; RV32ZVE32F-NEXT: .LBB95_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_11 +; RV32ZVE32F-NEXT: .LBB105_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB95_12 -; RV32ZVE32F-NEXT: .LBB95_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_12 +; RV32ZVE32F-NEXT: .LBB105_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB95_13 -; RV32ZVE32F-NEXT: .LBB95_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_13 +; RV32ZVE32F-NEXT: .LBB105_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB95_14 -; RV32ZVE32F-NEXT: .LBB95_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_14 +; RV32ZVE32F-NEXT: .LBB105_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB95_15 -; RV32ZVE32F-NEXT: .LBB95_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_15 +; RV32ZVE32F-NEXT: .LBB105_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB95_16 -; RV32ZVE32F-NEXT: .LBB95_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_16 +; RV32ZVE32F-NEXT: .LBB105_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB95_9 -; RV32ZVE32F-NEXT: .LBB95_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB105_9 +; RV32ZVE32F-NEXT: .LBB105_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB95_9: # %else20 +; RV32ZVE32F-NEXT: .LBB105_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11497,61 +13129,61 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB105_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB95_2 -; RV32ZVE32F-NEXT: .LBB95_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB105_2 +; RV32ZVE32F-NEXT: .LBB105_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB95_3 -; RV32ZVE32F-NEXT: .LBB95_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB105_3 +; RV32ZVE32F-NEXT: .LBB105_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB95_4 -; RV32ZVE32F-NEXT: .LBB95_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB105_4 +; RV32ZVE32F-NEXT: .LBB105_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB95_5 -; RV32ZVE32F-NEXT: .LBB95_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB105_5 +; RV32ZVE32F-NEXT: .LBB105_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB95_6 -; RV32ZVE32F-NEXT: .LBB95_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB105_6 +; RV32ZVE32F-NEXT: .LBB105_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB95_7 -; RV32ZVE32F-NEXT: .LBB95_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB105_7 +; RV32ZVE32F-NEXT: .LBB105_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB95_8 -; RV32ZVE32F-NEXT: j .LBB95_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_8 +; RV32ZVE32F-NEXT: j .LBB105_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: beqz a3, .LBB95_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB105_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -11559,9 +13191,9 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: srli a3, a3, 29 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB95_2: # %else +; RV64ZVE32F-NEXT: .LBB105_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB95_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB105_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 @@ -11570,42 +13202,42 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: srli a3, a3, 29 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB95_4: # %else2 +; RV64ZVE32F-NEXT: .LBB105_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB95_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB105_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB95_15 -; RV64ZVE32F-NEXT: .LBB95_6: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB105_15 +; RV64ZVE32F-NEXT: .LBB105_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB95_16 -; RV64ZVE32F-NEXT: .LBB95_7: # %else11 +; RV64ZVE32F-NEXT: bnez a3, .LBB105_16 +; RV64ZVE32F-NEXT: .LBB105_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB95_9 -; RV64ZVE32F-NEXT: .LBB95_8: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a3, .LBB105_9 +; RV64ZVE32F-NEXT: .LBB105_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 32 ; RV64ZVE32F-NEXT: srli a3, a3, 29 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB95_9: # %else14 +; RV64ZVE32F-NEXT: .LBB105_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB95_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB105_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 32 ; RV64ZVE32F-NEXT: srli a3, a3, 29 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa6, 0(a3) -; RV64ZVE32F-NEXT: .LBB95_11: # %else17 +; RV64ZVE32F-NEXT: .LBB105_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB95_13 +; RV64ZVE32F-NEXT: beqz a2, .LBB105_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -11613,7 +13245,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: srli a2, a2, 29 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB95_13: # %else20 +; RV64ZVE32F-NEXT: .LBB105_13: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11623,15 +13255,15 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB95_14: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB105_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 32 ; RV64ZVE32F-NEXT: srli a3, a3, 29 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB95_6 -; RV64ZVE32F-NEXT: .LBB95_15: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a3, .LBB105_6 +; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 32 @@ -11639,16 +13271,16 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB95_7 -; RV64ZVE32F-NEXT: .LBB95_16: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a3, .LBB105_7 +; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load10 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: slli a3, a3, 32 ; RV64ZVE32F-NEXT: srli a3, a3, 29 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB95_8 -; RV64ZVE32F-NEXT: j .LBB95_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB105_8 +; RV64ZVE32F-NEXT: j .LBB105_9 %eidxs = zext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) @@ -11699,34 +13331,34 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB96_10 +; RV32ZVE32F-NEXT: bnez a3, .LBB106_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB96_11 -; RV32ZVE32F-NEXT: .LBB96_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_11 +; RV32ZVE32F-NEXT: .LBB106_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB96_12 -; RV32ZVE32F-NEXT: .LBB96_3: # %else5 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_12 +; RV32ZVE32F-NEXT: .LBB106_3: # %else5 ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB96_13 -; RV32ZVE32F-NEXT: .LBB96_4: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_13 +; RV32ZVE32F-NEXT: .LBB106_4: # %else8 ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB96_14 -; RV32ZVE32F-NEXT: .LBB96_5: # %else11 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_14 +; RV32ZVE32F-NEXT: .LBB106_5: # %else11 ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB96_15 -; RV32ZVE32F-NEXT: .LBB96_6: # %else14 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_15 +; RV32ZVE32F-NEXT: .LBB106_6: # %else14 ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB96_16 -; RV32ZVE32F-NEXT: .LBB96_7: # %else17 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_16 +; RV32ZVE32F-NEXT: .LBB106_7: # %else17 ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: beqz a1, .LBB96_9 -; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19 +; RV32ZVE32F-NEXT: beqz a1, .LBB106_9 +; RV32ZVE32F-NEXT: .LBB106_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) -; RV32ZVE32F-NEXT: .LBB96_9: # %else20 +; RV32ZVE32F-NEXT: .LBB106_9: # %else20 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11736,88 +13368,88 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load +; RV32ZVE32F-NEXT: .LBB106_10: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB96_2 -; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1 +; RV32ZVE32F-NEXT: beqz a1, .LBB106_2 +; RV32ZVE32F-NEXT: .LBB106_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB96_3 -; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4 +; RV32ZVE32F-NEXT: beqz a1, .LBB106_3 +; RV32ZVE32F-NEXT: .LBB106_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB96_4 -; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7 +; RV32ZVE32F-NEXT: beqz a1, .LBB106_4 +; RV32ZVE32F-NEXT: .LBB106_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB96_5 -; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10 +; RV32ZVE32F-NEXT: beqz a1, .LBB106_5 +; RV32ZVE32F-NEXT: .LBB106_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB96_6 -; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13 +; RV32ZVE32F-NEXT: beqz a1, .LBB106_6 +; RV32ZVE32F-NEXT: .LBB106_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB96_7 -; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16 +; RV32ZVE32F-NEXT: beqz a1, .LBB106_7 +; RV32ZVE32F-NEXT: .LBB106_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fld fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a2, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB96_8 -; RV32ZVE32F-NEXT: j .LBB96_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_8 +; RV32ZVE32F-NEXT: j .LBB106_9 ; ; RV64ZVE32F-LABEL: mgather_baseidx_v8f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a4, a3, 1 -; RV64ZVE32F-NEXT: bnez a4, .LBB96_10 +; RV64ZVE32F-NEXT: bnez a4, .LBB106_10 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a4, a3, 2 -; RV64ZVE32F-NEXT: bnez a4, .LBB96_11 -; RV64ZVE32F-NEXT: .LBB96_2: # %else2 +; RV64ZVE32F-NEXT: bnez a4, .LBB106_11 +; RV64ZVE32F-NEXT: .LBB106_2: # %else2 ; RV64ZVE32F-NEXT: andi a4, a3, 4 -; RV64ZVE32F-NEXT: bnez a4, .LBB96_12 -; RV64ZVE32F-NEXT: .LBB96_3: # %else5 +; RV64ZVE32F-NEXT: bnez a4, .LBB106_12 +; RV64ZVE32F-NEXT: .LBB106_3: # %else5 ; RV64ZVE32F-NEXT: andi a4, a3, 8 -; RV64ZVE32F-NEXT: bnez a4, .LBB96_13 -; RV64ZVE32F-NEXT: .LBB96_4: # %else8 +; RV64ZVE32F-NEXT: bnez a4, .LBB106_13 +; RV64ZVE32F-NEXT: .LBB106_4: # %else8 ; RV64ZVE32F-NEXT: andi a4, a3, 16 -; RV64ZVE32F-NEXT: bnez a4, .LBB96_14 -; RV64ZVE32F-NEXT: .LBB96_5: # %else11 +; RV64ZVE32F-NEXT: bnez a4, .LBB106_14 +; RV64ZVE32F-NEXT: .LBB106_5: # %else11 ; RV64ZVE32F-NEXT: andi a4, a3, 32 -; RV64ZVE32F-NEXT: bnez a4, .LBB96_15 -; RV64ZVE32F-NEXT: .LBB96_6: # %else14 +; RV64ZVE32F-NEXT: bnez a4, .LBB106_15 +; RV64ZVE32F-NEXT: .LBB106_6: # %else14 ; RV64ZVE32F-NEXT: andi a4, a3, 64 -; RV64ZVE32F-NEXT: bnez a4, .LBB96_16 -; RV64ZVE32F-NEXT: .LBB96_7: # %else17 +; RV64ZVE32F-NEXT: bnez a4, .LBB106_16 +; RV64ZVE32F-NEXT: .LBB106_7: # %else17 ; RV64ZVE32F-NEXT: andi a3, a3, -128 -; RV64ZVE32F-NEXT: beqz a3, .LBB96_9 -; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a3, .LBB106_9 +; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load19 ; RV64ZVE32F-NEXT: ld a2, 56(a2) ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) -; RV64ZVE32F-NEXT: .LBB96_9: # %else20 +; RV64ZVE32F-NEXT: .LBB106_9: # %else20 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa2, 16(a0) @@ -11827,56 +13459,56 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: fsd fa6, 48(a0) ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load +; RV64ZVE32F-NEXT: .LBB106_10: # %cond.load ; RV64ZVE32F-NEXT: ld a4, 0(a2) ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa0, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 2 -; RV64ZVE32F-NEXT: beqz a4, .LBB96_2 -; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a4, .LBB106_2 +; RV64ZVE32F-NEXT: .LBB106_11: # %cond.load1 ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa1, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 4 -; RV64ZVE32F-NEXT: beqz a4, .LBB96_3 -; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a4, .LBB106_3 +; RV64ZVE32F-NEXT: .LBB106_12: # %cond.load4 ; RV64ZVE32F-NEXT: ld a4, 16(a2) ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa2, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 8 -; RV64ZVE32F-NEXT: beqz a4, .LBB96_4 -; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a4, .LBB106_4 +; RV64ZVE32F-NEXT: .LBB106_13: # %cond.load7 ; RV64ZVE32F-NEXT: ld a4, 24(a2) ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa3, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 16 -; RV64ZVE32F-NEXT: beqz a4, .LBB96_5 -; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a4, .LBB106_5 +; RV64ZVE32F-NEXT: .LBB106_14: # %cond.load10 ; RV64ZVE32F-NEXT: ld a4, 32(a2) ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa4, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 32 -; RV64ZVE32F-NEXT: beqz a4, .LBB96_6 -; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a4, .LBB106_6 +; RV64ZVE32F-NEXT: .LBB106_15: # %cond.load13 ; RV64ZVE32F-NEXT: ld a4, 40(a2) ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa5, 0(a4) ; RV64ZVE32F-NEXT: andi a4, a3, 64 -; RV64ZVE32F-NEXT: beqz a4, .LBB96_7 -; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16 +; RV64ZVE32F-NEXT: beqz a4, .LBB106_7 +; RV64ZVE32F-NEXT: .LBB106_16: # %cond.load16 ; RV64ZVE32F-NEXT: ld a4, 48(a2) ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: fld fa6, 0(a4) ; RV64ZVE32F-NEXT: andi a3, a3, -128 -; RV64ZVE32F-NEXT: bnez a3, .LBB96_8 -; RV64ZVE32F-NEXT: j .LBB96_9 +; RV64ZVE32F-NEXT: bnez a3, .LBB106_8 +; RV64ZVE32F-NEXT: j .LBB106_9 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) ret <8 x double> %v @@ -11908,16 +13540,16 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: .LBB97_2: # %else +; RV64ZVE32F-NEXT: .LBB107_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 @@ -11927,31 +13559,31 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 -; RV64ZVE32F-NEXT: .LBB97_4: # %else2 +; RV64ZVE32F-NEXT: .LBB107_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_25 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_26 -; RV64ZVE32F-NEXT: .LBB97_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_26 +; RV64ZVE32F-NEXT: .LBB107_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_8 -; RV64ZVE32F-NEXT: .LBB97_7: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_8 +; RV64ZVE32F-NEXT: .LBB107_7: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4 -; RV64ZVE32F-NEXT: .LBB97_8: # %else11 +; RV64ZVE32F-NEXT: .LBB107_8: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_10 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -11961,21 +13593,21 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5 -; RV64ZVE32F-NEXT: .LBB97_10: # %else14 +; RV64ZVE32F-NEXT: .LBB107_10: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_27 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_28 -; RV64ZVE32F-NEXT: .LBB97_12: # %else20 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_28 +; RV64ZVE32F-NEXT: .LBB107_12: # %else20 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_29 -; RV64ZVE32F-NEXT: .LBB97_13: # %else23 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_29 +; RV64ZVE32F-NEXT: .LBB107_13: # %else23 ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_15 -; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load25 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_15 +; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -11984,23 +13616,23 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9 -; RV64ZVE32F-NEXT: .LBB97_15: # %else26 +; RV64ZVE32F-NEXT: .LBB107_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_30 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else29 ; RV64ZVE32F-NEXT: slli a2, a1, 52 -; RV64ZVE32F-NEXT: bltz a2, .LBB97_31 -; RV64ZVE32F-NEXT: .LBB97_17: # %else32 +; RV64ZVE32F-NEXT: bltz a2, .LBB107_31 +; RV64ZVE32F-NEXT: .LBB107_17: # %else32 ; RV64ZVE32F-NEXT: slli a2, a1, 51 -; RV64ZVE32F-NEXT: bltz a2, .LBB97_32 -; RV64ZVE32F-NEXT: .LBB97_18: # %else35 +; RV64ZVE32F-NEXT: bltz a2, .LBB107_32 +; RV64ZVE32F-NEXT: .LBB107_18: # %else35 ; RV64ZVE32F-NEXT: slli a2, a1, 50 -; RV64ZVE32F-NEXT: bgez a2, .LBB97_20 -; RV64ZVE32F-NEXT: .LBB97_19: # %cond.load37 +; RV64ZVE32F-NEXT: bgez a2, .LBB107_20 +; RV64ZVE32F-NEXT: .LBB107_19: # %cond.load37 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -12009,11 +13641,11 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13 -; RV64ZVE32F-NEXT: .LBB97_20: # %else38 +; RV64ZVE32F-NEXT: .LBB107_20: # %else38 ; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bgez a2, .LBB97_22 +; RV64ZVE32F-NEXT: bgez a2, .LBB107_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12021,10 +13653,10 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14 -; RV64ZVE32F-NEXT: .LBB97_22: # %else41 +; RV64ZVE32F-NEXT: .LBB107_22: # %else41 ; RV64ZVE32F-NEXT: lui a2, 1048568 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB97_24 +; RV64ZVE32F-NEXT: beqz a1, .LBB107_24 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -12034,10 +13666,10 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15 -; RV64ZVE32F-NEXT: .LBB97_24: # %else44 +; RV64ZVE32F-NEXT: .LBB107_24: # %else44 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB97_25: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB107_25: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12045,8 +13677,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_6 -; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_6 +; RV64ZVE32F-NEXT: .LBB107_26: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -12056,9 +13688,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_7 -; RV64ZVE32F-NEXT: j .LBB97_8 -; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_7 +; RV64ZVE32F-NEXT: j .LBB107_8 +; RV64ZVE32F-NEXT: .LBB107_27: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12066,8 +13698,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_12 -; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_12 +; RV64ZVE32F-NEXT: .LBB107_28: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -12077,8 +13709,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_13 -; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load22 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_13 +; RV64ZVE32F-NEXT: .LBB107_29: # %cond.load22 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12086,9 +13718,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8 ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_14 -; RV64ZVE32F-NEXT: j .LBB97_15 -; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load28 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_14 +; RV64ZVE32F-NEXT: j .LBB107_15 +; RV64ZVE32F-NEXT: .LBB107_30: # %cond.load28 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12096,8 +13728,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10 ; RV64ZVE32F-NEXT: slli a2, a1, 52 -; RV64ZVE32F-NEXT: bgez a2, .LBB97_17 -; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31 +; RV64ZVE32F-NEXT: bgez a2, .LBB107_17 +; RV64ZVE32F-NEXT: .LBB107_31: # %cond.load31 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -12107,8 +13739,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11 ; RV64ZVE32F-NEXT: slli a2, a1, 51 -; RV64ZVE32F-NEXT: bgez a2, .LBB97_18 -; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34 +; RV64ZVE32F-NEXT: bgez a2, .LBB107_18 +; RV64ZVE32F-NEXT: .LBB107_32: # %cond.load34 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12116,8 +13748,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12 ; RV64ZVE32F-NEXT: slli a2, a1, 50 -; RV64ZVE32F-NEXT: bltz a2, .LBB97_19 -; RV64ZVE32F-NEXT: j .LBB97_20 +; RV64ZVE32F-NEXT: bltz a2, .LBB107_19 +; RV64ZVE32F-NEXT: j .LBB107_20 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs %v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru) ret <16 x i8> %v @@ -12163,16 +13795,16 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: .LBB98_2: # %else +; RV64ZVE32F-NEXT: .LBB108_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 @@ -12182,31 +13814,31 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 -; RV64ZVE32F-NEXT: .LBB98_4: # %else2 +; RV64ZVE32F-NEXT: .LBB108_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_49 +; RV64ZVE32F-NEXT: bnez a2, .LBB108_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_50 -; RV64ZVE32F-NEXT: .LBB98_6: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB108_50 +; RV64ZVE32F-NEXT: .LBB108_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_8 -; RV64ZVE32F-NEXT: .LBB98_7: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_8 +; RV64ZVE32F-NEXT: .LBB108_7: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 -; RV64ZVE32F-NEXT: .LBB98_8: # %else11 +; RV64ZVE32F-NEXT: .LBB108_8: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_10 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1 @@ -12216,21 +13848,21 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 -; RV64ZVE32F-NEXT: .LBB98_10: # %else14 +; RV64ZVE32F-NEXT: .LBB108_10: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_51 +; RV64ZVE32F-NEXT: bnez a2, .LBB108_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_52 -; RV64ZVE32F-NEXT: .LBB98_12: # %else20 +; RV64ZVE32F-NEXT: bnez a2, .LBB108_52 +; RV64ZVE32F-NEXT: .LBB108_12: # %else20 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_53 -; RV64ZVE32F-NEXT: .LBB98_13: # %else23 +; RV64ZVE32F-NEXT: bnez a2, .LBB108_53 +; RV64ZVE32F-NEXT: .LBB108_13: # %else23 ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_15 -; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load25 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_15 +; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -12239,13 +13871,13 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v13, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9 -; RV64ZVE32F-NEXT: .LBB98_15: # %else26 +; RV64ZVE32F-NEXT: .LBB108_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_17 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12253,9 +13885,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10 -; RV64ZVE32F-NEXT: .LBB98_17: # %else29 +; RV64ZVE32F-NEXT: .LBB108_17: # %else29 ; RV64ZVE32F-NEXT: slli a2, a1, 52 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_19 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_19 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 @@ -12265,11 +13897,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11 -; RV64ZVE32F-NEXT: .LBB98_19: # %else32 +; RV64ZVE32F-NEXT: .LBB108_19: # %else32 ; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_21 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12277,9 +13909,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12 -; RV64ZVE32F-NEXT: .LBB98_21: # %else35 +; RV64ZVE32F-NEXT: .LBB108_21: # %else35 ; RV64ZVE32F-NEXT: slli a2, a1, 50 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_23 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_23 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 1 @@ -12289,21 +13921,21 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13 -; RV64ZVE32F-NEXT: .LBB98_23: # %else38 +; RV64ZVE32F-NEXT: .LBB108_23: # %else38 ; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_54 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else41 ; RV64ZVE32F-NEXT: slli a2, a1, 48 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_55 -; RV64ZVE32F-NEXT: .LBB98_25: # %else44 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_55 +; RV64ZVE32F-NEXT: .LBB108_25: # %else44 ; RV64ZVE32F-NEXT: slli a2, a1, 47 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_56 -; RV64ZVE32F-NEXT: .LBB98_26: # %else47 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_56 +; RV64ZVE32F-NEXT: .LBB108_26: # %else47 ; RV64ZVE32F-NEXT: slli a2, a1, 46 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_28 -; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_28 +; RV64ZVE32F-NEXT: .LBB108_27: # %cond.load49 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -12312,31 +13944,31 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17 -; RV64ZVE32F-NEXT: .LBB98_28: # %else50 +; RV64ZVE32F-NEXT: .LBB108_28: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_57 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else53 ; RV64ZVE32F-NEXT: slli a2, a1, 44 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_58 -; RV64ZVE32F-NEXT: .LBB98_30: # %else56 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_58 +; RV64ZVE32F-NEXT: .LBB108_30: # %else56 ; RV64ZVE32F-NEXT: slli a2, a1, 43 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_32 -; RV64ZVE32F-NEXT: .LBB98_31: # %cond.load58 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_32 +; RV64ZVE32F-NEXT: .LBB108_31: # %cond.load58 ; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20 -; RV64ZVE32F-NEXT: .LBB98_32: # %else59 +; RV64ZVE32F-NEXT: .LBB108_32: # %else59 ; RV64ZVE32F-NEXT: slli a2, a1, 42 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_34 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1 @@ -12346,21 +13978,21 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21 -; RV64ZVE32F-NEXT: .LBB98_34: # %else62 +; RV64ZVE32F-NEXT: .LBB108_34: # %else62 ; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_59 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else65 ; RV64ZVE32F-NEXT: slli a2, a1, 40 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_60 -; RV64ZVE32F-NEXT: .LBB98_36: # %else68 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_60 +; RV64ZVE32F-NEXT: .LBB108_36: # %else68 ; RV64ZVE32F-NEXT: slli a2, a1, 39 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_61 -; RV64ZVE32F-NEXT: .LBB98_37: # %else71 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_61 +; RV64ZVE32F-NEXT: .LBB108_37: # %else71 ; RV64ZVE32F-NEXT: slli a2, a1, 38 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_39 -; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_39 +; RV64ZVE32F-NEXT: .LBB108_38: # %cond.load73 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -12369,23 +14001,23 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25 -; RV64ZVE32F-NEXT: .LBB98_39: # %else74 +; RV64ZVE32F-NEXT: .LBB108_39: # %else74 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_62 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else77 ; RV64ZVE32F-NEXT: slli a2, a1, 36 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_63 -; RV64ZVE32F-NEXT: .LBB98_41: # %else80 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_63 +; RV64ZVE32F-NEXT: .LBB108_41: # %else80 ; RV64ZVE32F-NEXT: slli a2, a1, 35 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_64 -; RV64ZVE32F-NEXT: .LBB98_42: # %else83 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_64 +; RV64ZVE32F-NEXT: .LBB108_42: # %else83 ; RV64ZVE32F-NEXT: slli a2, a1, 34 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_44 -; RV64ZVE32F-NEXT: .LBB98_43: # %cond.load85 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_44 +; RV64ZVE32F-NEXT: .LBB108_43: # %cond.load85 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -12394,11 +14026,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 -; RV64ZVE32F-NEXT: .LBB98_44: # %else86 +; RV64ZVE32F-NEXT: .LBB108_44: # %else86 ; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_46 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12406,10 +14038,10 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30 -; RV64ZVE32F-NEXT: .LBB98_46: # %else89 +; RV64ZVE32F-NEXT: .LBB108_46: # %else89 ; RV64ZVE32F-NEXT: lui a2, 524288 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB98_48 +; RV64ZVE32F-NEXT: beqz a1, .LBB108_48 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.load91 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -12420,10 +14052,10 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31 -; RV64ZVE32F-NEXT: .LBB98_48: # %else92 +; RV64ZVE32F-NEXT: .LBB108_48: # %else92 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB98_49: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB108_49: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12431,8 +14063,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_6 -; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_6 +; RV64ZVE32F-NEXT: .LBB108_50: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -12442,9 +14074,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_7 -; RV64ZVE32F-NEXT: j .LBB98_8 -; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load16 +; RV64ZVE32F-NEXT: bnez a2, .LBB108_7 +; RV64ZVE32F-NEXT: j .LBB108_8 +; RV64ZVE32F-NEXT: .LBB108_51: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12452,8 +14084,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_12 -; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_12 +; RV64ZVE32F-NEXT: .LBB108_52: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -12463,8 +14095,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_13 -; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22 +; RV64ZVE32F-NEXT: beqz a2, .LBB108_13 +; RV64ZVE32F-NEXT: .LBB108_53: # %cond.load22 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12472,9 +14104,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v13, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8 ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_14 -; RV64ZVE32F-NEXT: j .LBB98_15 -; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load40 +; RV64ZVE32F-NEXT: bnez a2, .LBB108_14 +; RV64ZVE32F-NEXT: j .LBB108_15 +; RV64ZVE32F-NEXT: .LBB108_54: # %cond.load40 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12482,8 +14114,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14 ; RV64ZVE32F-NEXT: slli a2, a1, 48 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_25 -; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load43 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_25 +; RV64ZVE32F-NEXT: .LBB108_55: # %cond.load43 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -12493,8 +14125,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15 ; RV64ZVE32F-NEXT: slli a2, a1, 47 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_26 -; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_26 +; RV64ZVE32F-NEXT: .LBB108_56: # %cond.load46 ; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12502,9 +14134,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16 ; RV64ZVE32F-NEXT: slli a2, a1, 46 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_27 -; RV64ZVE32F-NEXT: j .LBB98_28 -; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load52 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_27 +; RV64ZVE32F-NEXT: j .LBB108_28 +; RV64ZVE32F-NEXT: .LBB108_57: # %cond.load52 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12512,8 +14144,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18 ; RV64ZVE32F-NEXT: slli a2, a1, 44 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_30 -; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_30 +; RV64ZVE32F-NEXT: .LBB108_58: # %cond.load55 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -12523,9 +14155,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19 ; RV64ZVE32F-NEXT: slli a2, a1, 43 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_31 -; RV64ZVE32F-NEXT: j .LBB98_32 -; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load64 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_31 +; RV64ZVE32F-NEXT: j .LBB108_32 +; RV64ZVE32F-NEXT: .LBB108_59: # %cond.load64 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12533,8 +14165,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22 ; RV64ZVE32F-NEXT: slli a2, a1, 40 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_36 -; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load67 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_36 +; RV64ZVE32F-NEXT: .LBB108_60: # %cond.load67 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -12544,8 +14176,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23 ; RV64ZVE32F-NEXT: slli a2, a1, 39 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_37 -; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load70 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_37 +; RV64ZVE32F-NEXT: .LBB108_61: # %cond.load70 ; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12553,9 +14185,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24 ; RV64ZVE32F-NEXT: slli a2, a1, 38 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_38 -; RV64ZVE32F-NEXT: j .LBB98_39 -; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load76 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_38 +; RV64ZVE32F-NEXT: j .LBB108_39 +; RV64ZVE32F-NEXT: .LBB108_62: # %cond.load76 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12563,8 +14195,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26 ; RV64ZVE32F-NEXT: slli a2, a1, 36 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_41 -; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_41 +; RV64ZVE32F-NEXT: .LBB108_63: # %cond.load79 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -12574,8 +14206,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27 ; RV64ZVE32F-NEXT: slli a2, a1, 35 -; RV64ZVE32F-NEXT: bgez a2, .LBB98_42 -; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82 +; RV64ZVE32F-NEXT: bgez a2, .LBB108_42 +; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load82 ; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12583,8 +14215,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28 ; RV64ZVE32F-NEXT: slli a2, a1, 34 -; RV64ZVE32F-NEXT: bltz a2, .LBB98_43 -; RV64ZVE32F-NEXT: j .LBB98_44 +; RV64ZVE32F-NEXT: bltz a2, .LBB108_43 +; RV64ZVE32F-NEXT: j .LBB108_44 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru) ret <32 x i8> %v @@ -13136,8 +14768,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV32-LABEL: mgather_gather_2xSEW_unaligned: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI113_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI113_0) +; RV32-NEXT: lui a1, %hi(.LCPI123_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI123_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle8.v v9, (a1) ; RV32-NEXT: vluxei8.v v8, (a0), v9 @@ -13145,8 +14777,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; ; RV64V-LABEL: mgather_gather_2xSEW_unaligned: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI113_0) -; RV64V-NEXT: addi a1, a1, %lo(.LCPI113_0) +; RV64V-NEXT: lui a1, %hi(.LCPI123_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI123_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64V-NEXT: vle8.v v9, (a1) ; RV64V-NEXT: vluxei8.v v8, (a0), v9 @@ -13184,8 +14816,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; RV32-LABEL: mgather_gather_2xSEW_unaligned2: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI114_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI114_0) +; RV32-NEXT: lui a1, %hi(.LCPI124_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI124_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle8.v v9, (a1) ; RV32-NEXT: vluxei8.v v8, (a0), v9 @@ -13193,8 +14825,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; ; RV64V-LABEL: mgather_gather_2xSEW_unaligned2: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI114_0) -; RV64V-NEXT: addi a1, a1, %lo(.LCPI114_0) +; RV64V-NEXT: lui a1, %hi(.LCPI124_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI124_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64V-NEXT: vle8.v v9, (a1) ; RV64V-NEXT: vluxei8.v v8, (a0), v9 @@ -13385,8 +15017,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV32-LABEL: mgather_shuffle_vrgather: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI119_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI119_0) +; RV32-NEXT: lui a1, %hi(.LCPI129_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI129_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v9, (a1) ; RV32-NEXT: vle16.v v10, (a0) @@ -13395,8 +15027,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; ; RV64V-LABEL: mgather_shuffle_vrgather: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI119_0) -; RV64V-NEXT: addi a1, a1, %lo(.LCPI119_0) +; RV64V-NEXT: lui a1, %hi(.LCPI129_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI129_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64V-NEXT: vle16.v v9, (a1) ; RV64V-NEXT: vle16.v v10, (a0) @@ -13921,4 +15553,10 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32V-ZVFH: {{.*}} +; RV32V-ZVFHMIN: {{.*}} +; RV32ZVE32F-ZVFH: {{.*}} +; RV32ZVE32F-ZVFHMIN: {{.*}} ; RV64: {{.*}} +; RV64V-ZVFH: {{.*}} +; RV64V-ZVFHMIN: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index a445c8fe08172..fcddec226ceab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1,12 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZVE32F +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN + +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>) @@ -17,11 +26,11 @@ define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v1i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v1i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v1i8: ; RV32ZVE32F: # %bb.0: @@ -52,11 +61,11 @@ define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i8: ; RV32ZVE32F: # %bb.0: @@ -97,12 +106,12 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i16_truncstore_v2i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i16_truncstore_v2i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8: ; RV32ZVE32F: # %bb.0: @@ -148,14 +157,14 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i32_truncstore_v2i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i32_truncstore_v2i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8: ; RV32ZVE32F: # %bb.0: @@ -207,16 +216,16 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i64_truncstore_v2i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i64_truncstore_v2i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: ; RV32ZVE32F: # %bb.0: @@ -267,11 +276,11 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v4i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v4i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v4i8: ; RV64ZVE32F: # %bb.0: @@ -327,11 +336,11 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_truemask_v4i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10 -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_truemask_v4i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: ; RV64ZVE32F: # %bb.0: @@ -369,11 +378,11 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v8i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v8i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v8i8: ; RV64ZVE32F: # %bb.0: @@ -471,13 +480,13 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v9 -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v9 +; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8: ; RV64ZVE32F: # %bb.0: @@ -593,11 +602,11 @@ define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v1i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v1i16: ; RV32ZVE32F: # %bb.0: @@ -628,11 +637,11 @@ define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i16: ; RV32ZVE32F: # %bb.0: @@ -673,12 +682,12 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i32_truncstore_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i32_truncstore_v2i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: ; RV32ZVE32F: # %bb.0: @@ -725,14 +734,14 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i64_truncstore_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i64_truncstore_v2i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV32ZVE32F: # %bb.0: @@ -785,11 +794,11 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v4i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v4i16: ; RV64ZVE32F: # %bb.0: @@ -845,11 +854,11 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_truemask_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10 -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_truemask_v4i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: ; RV64ZVE32F: # %bb.0: @@ -887,11 +896,11 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v8i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v8i16: ; RV64ZVE32F: # %bb.0: @@ -990,14 +999,14 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i8_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i8_v8i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16: ; RV64ZVE32F: # %bb.0: @@ -1123,14 +1132,14 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16: ; RV64ZVE32F: # %bb.0: @@ -1256,13 +1265,13 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v10, v9, v9 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v9, v9 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16: ; RV64ZVE32F: # %bb.0: @@ -1395,14 +1404,14 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16: ; RV64ZVE32F: # %bb.0: @@ -1526,11 +1535,11 @@ define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v1i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v1i32: ; RV32ZVE32F: # %bb.0: @@ -1561,11 +1570,11 @@ define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i32: ; RV32ZVE32F: # %bb.0: @@ -1606,12 +1615,12 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i64_truncstore_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i64_truncstore_v2i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64V-NEXT: vnsrl.wi v8, v8, 0 +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV32ZVE32F: # %bb.0: @@ -1662,11 +1671,11 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v4i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v4i32: ; RV64ZVE32F: # %bb.0: @@ -1722,11 +1731,11 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_truemask_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10 -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_truemask_v4i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: ; RV64ZVE32F: # %bb.0: @@ -1764,11 +1773,11 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v8i32: ; RV64ZVE32F: # %bb.0: @@ -1870,14 +1879,14 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i8_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i8_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32: ; RV64ZVE32F: # %bb.0: @@ -2007,14 +2016,14 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32: ; RV64ZVE32F: # %bb.0: @@ -2146,14 +2155,14 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v11, v10 -; RV64-NEXT: vsll.vi v10, v11, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v11, v10 +; RV64V-NEXT: vsll.vi v10, v11, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32: ; RV64ZVE32F: # %bb.0: @@ -2292,14 +2301,14 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i16_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i16_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32: ; RV64ZVE32F: # %bb.0: @@ -2430,14 +2439,14 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32: ; RV64ZVE32F: # %bb.0: @@ -2569,13 +2578,13 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v10, v12, 2 -; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v12, v10 +; RV64V-NEXT: vsll.vi v10, v12, 2 +; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32: ; RV64ZVE32F: # %bb.0: @@ -2716,14 +2725,14 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf2 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32: ; RV64ZVE32F: # %bb.0: @@ -2849,11 +2858,11 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v1i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v1i64: ; RV32ZVE32F: # %bb.0: @@ -2890,11 +2899,11 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2i64: ; RV32ZVE32F: # %bb.0: @@ -2957,11 +2966,11 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v4i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v4i64: ; RV32ZVE32F: # %bb.0: @@ -3070,11 +3079,11 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v10 ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_truemask_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10 -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_truemask_v4i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_truemask_v4i64: ; RV32ZVE32F: # %bb.0: @@ -3140,11 +3149,11 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v8i64: ; RV32ZVE32F: # %bb.0: @@ -3368,13 +3377,13 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i8_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i8_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64: ; RV32ZVE32F: # %bb.0: @@ -3612,13 +3621,13 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64: ; RV32ZVE32F: # %bb.0: @@ -3857,14 +3866,14 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v13, v12 -; RV64-NEXT: vsll.vi v12, v13, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v13, v12 +; RV64V-NEXT: vsll.vi v12, v13, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV32ZVE32F: # %bb.0: @@ -4111,13 +4120,13 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i16_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i16_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: @@ -4356,13 +4365,13 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: @@ -4602,14 +4611,14 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v14, v12 -; RV64-NEXT: vsll.vi v12, v14, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v14, v12 +; RV64V-NEXT: vsll.vi v12, v14, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: @@ -4858,13 +4867,13 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i32_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i32_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf2 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64: ; RV32ZVE32F: # %bb.0: @@ -5101,13 +5110,13 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf2 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64: ; RV32ZVE32F: # %bb.0: @@ -5345,13 +5354,13 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vzext.vf2 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64: ; RV32ZVE32F: # %bb.0: @@ -5598,12 +5607,12 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsll.vi v12, v12, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsll.vi v12, v12, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64: ; RV32ZVE32F: # %bb.0: @@ -5874,63 +5883,65 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ret void } -declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>) +declare void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat>, <1 x ptr>, i32, <1 x i1>) -define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) { -; RV32V-LABEL: mscatter_v1f16: +define void @mscatter_v1bf16(<1 x bfloat> %val, <1 x ptr> %ptrs, <1 x i1> %m) { +; RV32V-LABEL: mscatter_v1bf16: ; RV32V: # %bb.0: ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v1f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v1bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; -; RV32ZVE32F-LABEL: mscatter_v1f16: +; RV32ZVE32F-LABEL: mscatter_v1bf16: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32ZVE32F-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_v1f16: +; RV64ZVE32F-LABEL: mscatter_v1bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vfirst.m a1, v0 ; RV64ZVE32F-NEXT: bnez a1, .LBB52_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: .LBB52_2: # %else ; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m) + call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m) ret void } -declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat>, <2 x ptr>, i32, <2 x i1>) -define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) { -; RV32V-LABEL: mscatter_v2f16: +define void @mscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m) { +; RV32V-LABEL: mscatter_v2bf16: ; RV32V: # %bb.0: ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; -; RV32ZVE32F-LABEL: mscatter_v2f16: +; RV32ZVE32F-LABEL: mscatter_v2bf16: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32ZVE32F-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_v2f16: +; RV64ZVE32F-LABEL: mscatter_v2bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 @@ -5942,35 +5953,39 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV64ZVE32F-NEXT: .LBB53_2: # %else2 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB53_2 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a1) ; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) + call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) ret void } -declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat>, <4 x ptr>, i32, <4 x i1>) -define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) { -; RV32-LABEL: mscatter_v4f16: +define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) { +; RV32-LABEL: mscatter_v4bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v4f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v4bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_v4f16: +; RV64ZVE32F-LABEL: mscatter_v4bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a4, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) @@ -5992,87 +6007,103 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a5, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a5 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: andi a0, a3, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_2 ; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a4) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a4) ; RV64ZVE32F-NEXT: andi a0, a3, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_3 ; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a3, a3, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB54_4 ; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a1) ; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m) + call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m) ret void } -define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { -; RV32-LABEL: mscatter_truemask_v4f16: +define void @mscatter_truemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) { +; RV32-LABEL: mscatter_truemask_v4bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_truemask_v4f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10 -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_truemask_v4bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_truemask_v4f16: +; RV64ZVE32F-LABEL: mscatter_truemask_v4bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 0(a0) ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: ld a3, 16(a0) ; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vmv.x.s a4, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a4 +; RV64ZVE32F-NEXT: fsh fa5, 0(a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) + call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void } -define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { -; CHECK-LABEL: mscatter_falsemask_v4f16: +define void @mscatter_falsemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) { +; CHECK-LABEL: mscatter_falsemask_v4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: ret - call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer) + call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer) ret void } -declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat>, <8 x ptr>, i32, <8 x i1>) -define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) { -; RV32-LABEL: mscatter_v8f16: +define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) { +; RV32-LABEL: mscatter_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v8f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v8bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_v8f16: +; RV64ZVE32F-LABEL: mscatter_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a3, 40(a0) ; RV64ZVE32F-NEXT: ld a2, 48(a0) @@ -6110,57 +6141,73 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s t1, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, t1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_2 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (t0) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(t0) ; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_3 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a7) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a7) ; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_4 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v9, (a6) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a6) ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_5 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vse16.v v9, (a5) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a5) ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_6 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a3) ; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_7 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_8 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-NEXT: fsh fa5, 0(a1) ; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) ret void } -define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { -; RV32-LABEL: mscatter_baseidx_v8i8_v8f16: +define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_v8i8_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vsext.vf4 v10, v9 @@ -6169,16 +6216,16 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i8_v8f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i8_v8bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f16: +; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -6188,8 +6235,10 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB58_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_4 @@ -6201,7 +6250,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB58_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 @@ -6226,7 +6277,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB58_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -6243,7 +6296,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_6 ; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5 @@ -6254,7 +6309,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_7 ; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7 @@ -6264,7 +6321,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_8 ; RV64ZVE32F-NEXT: j .LBB58_9 @@ -6274,7 +6333,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB58_11 ; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13 @@ -6285,15 +6346,17 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: ret - %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs - call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs + call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) ret void } -define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { -; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16: +define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vsext.vf4 v10, v9 @@ -6302,16 +6365,16 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f16: +; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -6321,8 +6384,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB59_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_4 @@ -6334,7 +6399,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB59_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 @@ -6359,7 +6426,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB59_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -6376,7 +6445,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_6 ; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5 @@ -6387,7 +6458,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_7 ; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7 @@ -6397,7 +6470,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_8 ; RV64ZVE32F-NEXT: j .LBB59_9 @@ -6407,7 +6482,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB59_11 ; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13 @@ -6418,16 +6495,18 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i16> - %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs - call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs + call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) ret void } -define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { -; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16: +define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vwaddu.vv v10, v9, v9 @@ -6435,15 +6514,15 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v10, v9, v9 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v9, v9 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16: +; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 @@ -6454,8 +6533,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB60_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_4 @@ -6468,7 +6549,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB60_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 @@ -6494,7 +6577,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB60_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -6512,7 +6597,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_6 ; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5 @@ -6524,7 +6611,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_7 ; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7 @@ -6535,7 +6624,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_8 ; RV64ZVE32F-NEXT: j .LBB60_9 @@ -6546,7 +6637,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_11 ; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13 @@ -6558,43 +6651,47 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> - %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs - call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs + call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) ret void } -define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { -; RV32-LABEL: mscatter_baseidx_v8f16: +define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_v8bf16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vwadd.vv v10, v9, v9 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8bf16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_baseidx_v8f16: +; RV64ZVE32F-LABEL: mscatter_baseidx_v8bf16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB61_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_4 @@ -6606,7 +6703,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB61_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 @@ -6631,7 +6730,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB61_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -6648,7 +6749,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_6 ; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5 @@ -6659,7 +6762,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_7 ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7 @@ -6668,7 +6773,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_8 ; RV64ZVE32F-NEXT: j .LBB61_9 @@ -6678,7 +6785,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV64ZVE32F-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_11 ; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13 @@ -6689,172 +6798,1722 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-NEXT: fsh fa5, 0(a0) ; RV64ZVE32F-NEXT: ret - %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs - call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs + call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) ret void } -declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>) +declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>) -define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) { -; RV32V-LABEL: mscatter_v1f32: +define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) { +; RV32V-LABEL: mscatter_v1f16: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v1f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v1f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; -; RV32ZVE32F-LABEL: mscatter_v1f32: +; RV32ZVE32F-LABEL: mscatter_v1f16: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32ZVE32F-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_v1f32: -; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vfirst.m a1, v0 -; RV64ZVE32F-NEXT: bnez a1, .LBB62_2 -; RV64ZVE32F-NEXT: # %bb.1: # %cond.store -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: .LBB62_2: # %else -; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m) +; RV64ZVE32F-ZVFH-LABEL: mscatter_v1f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfirst.m a1, v0 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB62_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: .LBB62_2: # %else +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v1f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vfirst.m a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB62_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB62_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: ret + call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m) ret void } -declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>) -define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) { -; RV32V-LABEL: mscatter_v2f32: +define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) { +; RV32V-LABEL: mscatter_v2f16: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; -; RV32ZVE32F-LABEL: mscatter_v2f32: +; RV32ZVE32F-LABEL: mscatter_v2f16: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32ZVE32F-NEXT: ret ; -; RV64ZVE32F-LABEL: mscatter_v2f32: -; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: bnez a3, .LBB63_3 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB63_4 -; RV64ZVE32F-NEXT: .LBB63_2: # %else2 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB63_3: # %cond.store -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB63_2 -; RV64ZVE32F-NEXT: .LBB63_4: # %cond.store1 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v8, (a1) -; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m) +; RV64ZVE32F-ZVFH-LABEL: mscatter_v2f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB63_3 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB63_4 +; RV64ZVE32F-ZVFH-NEXT: .LBB63_2: # %else2 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB63_3: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB63_2 +; RV64ZVE32F-ZVFH-NEXT: .LBB63_4: # %cond.store1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v2f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB63_3 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB63_4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB63_2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) ret void } -declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>) -define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) { -; RV32-LABEL: mscatter_v4f32: +define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) { +; RV32-LABEL: mscatter_v4f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v4f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t -; RV64-NEXT: ret -; -; RV64ZVE32F-LABEL: mscatter_v4f32: -; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi a5, a3, 1 -; RV64ZVE32F-NEXT: bnez a5, .LBB64_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB64_6 -; RV64ZVE32F-NEXT: .LBB64_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB64_7 -; RV64ZVE32F-NEXT: .LBB64_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB64_8 -; RV64ZVE32F-NEXT: .LBB64_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB64_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB64_2 -; RV64ZVE32F-NEXT: .LBB64_6: # %cond.store1 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB64_3 -; RV64ZVE32F-NEXT: .LBB64_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse32.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB64_4 -; RV64ZVE32F-NEXT: .LBB64_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v8, (a1) -; RV64ZVE32F-NEXT: ret - call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m) +; RV64V-LABEL: mscatter_v4f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mscatter_v4f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: ld a4, 8(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a1, 24(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a3, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a5, a3, 1 +; RV64ZVE32F-ZVFH-NEXT: bnez a5, .LBB64_5 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB64_2: # %else2 +; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB64_3: # %else4 +; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB64_8 +; RV64ZVE32F-ZVFH-NEXT: .LBB64_4: # %else6 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB64_5: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_2 +; RV64ZVE32F-ZVFH-NEXT: .LBB64_6: # %cond.store1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a4) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_3 +; RV64ZVE32F-ZVFH-NEXT: .LBB64_7: # %cond.store3 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a3, .LBB64_4 +; RV64ZVE32F-ZVFH-NEXT: .LBB64_8: # %cond.store5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v4f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: ld a4, 8(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 24(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a5, a3, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a5, .LBB64_5 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB64_8 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a5, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a5 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a4) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_3 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a3, .LBB64_4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m) ret void } -define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { -; RV32-LABEL: mscatter_truemask_v4f32: +define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { +; RV32-LABEL: mscatter_truemask_v4f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_truemask_v4f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10 -; RV64-NEXT: ret -; -; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: -; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64V-LABEL: mscatter_truemask_v4f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mscatter_truemask_v4f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_truemask_v4f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3) +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) + ret void +} + +define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { +; CHECK-LABEL: mscatter_falsemask_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer) + ret void +} + +declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>) + +define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) { +; RV32-LABEL: mscatter_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64V-LABEL: mscatter_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mscatter_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: ld a3, 40(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a1, 56(a0) +; RV64ZVE32F-ZVFH-NEXT: ld t0, 8(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a7, 16(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a6, 24(a0) +; RV64ZVE32F-ZVFH-NEXT: ld a5, 32(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-ZVFH-NEXT: andi t1, a4, 1 +; RV64ZVE32F-ZVFH-NEXT: bnez t1, .LBB67_9 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_10 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_2: # %else2 +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_3: # %else4 +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_12 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_4: # %else6 +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_5: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_6: # %else10 +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_15 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_7: # %else12 +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_8: # %else14 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB67_9: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_2 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_10: # %cond.store1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (t0) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_3 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_11: # %cond.store3 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a7) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_4 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_12: # %cond.store5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a6) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_5 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_13: # %cond.store7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a5) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_14: # %cond.store9 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_15: # %cond.store11 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 6 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_8 +; RV64ZVE32F-ZVFH-NEXT: .LBB67_16: # %cond.store13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 40(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 56(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld t0, 8(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a7, 16(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a6, 24(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ld a5, 32(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi t1, a4, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez t1, .LBB67_9 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_10 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_3: # %else4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_12 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_4: # %else6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_5: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_6: # %else10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_15 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s t1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, t1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_2 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(t0) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_3 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a7) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a6) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_5 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a5) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + ret void +} + +define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_v8i8_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsext.vf4 v10, v9 +; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64V-LABEL: mscatter_baseidx_v8i8_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB68_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB68_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_6: # %else6 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_7: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_8: # %cond.store9 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB68_9: # %else10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_11: # %else14 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB68_12: # %cond.store3 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %cond.store5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_14: # %cond.store7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB68_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_15: # %cond.store11 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB68_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB68_16: # %cond.store13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %cond.store9 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %else10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_11: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store5 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB68_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs + call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + ret void +} + +define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsext.vf4 v10, v9 +; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_6: # %else6 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_7: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_8: # %cond.store9 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB69_9: # %else10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_11: # %else14 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB69_12: # %cond.store3 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %cond.store5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_14: # %cond.store7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB69_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_15: # %cond.store11 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB69_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB69_16: # %cond.store13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %cond.store9 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %else10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_11: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store5 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB69_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %eidxs = sext <8 x i8> %idxs to <8 x i16> + %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs + call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + ret void +} + +define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v9, v9 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %else6 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.store9 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB70_9: # %else10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_11: # %else14 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB70_12: # %cond.store3 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %cond.store5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_14: # %cond.store7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB70_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_15: # %cond.store11 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB70_16: # %cond.store13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.store9 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %else10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_11: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store5 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %eidxs = zext <8 x i8> %idxs to <8 x i16> + %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs + call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + ret void +} + +define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { +; RV32-LABEL: mscatter_baseidx_v8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwadd.vv v10, v9, v9 +; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64V-LABEL: mscatter_baseidx_v8f16: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v9 +; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret +; +; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8f16: +; RV64ZVE32F-ZVFH: # %bb.0: +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_2 +; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB71_2: # %else +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_4 +; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB71_4: # %else2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_12 +; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_13 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_6: # %else6 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_14 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_7: # %else8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_8: # %cond.store9 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: .LBB71_9: # %else10 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_11: # %else14 +; RV64ZVE32F-ZVFH-NEXT: ret +; RV64ZVE32F-ZVFH-NEXT: .LBB71_12: # %cond.store3 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_6 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %cond.store5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_7 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_14: # %cond.store7 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_8 +; RV64ZVE32F-ZVFH-NEXT: j .LBB71_9 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_15: # %cond.store11 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB71_11 +; RV64ZVE32F-ZVFH-NEXT: .LBB71_16: # %cond.store13 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-ZVFH-NEXT: ret +; +; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8f16: +; RV64ZVE32F-ZVFHMIN: # %bb.0: +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_2 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_2: # %else +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_4 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_12 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_13 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else6 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_14 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %cond.store9 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %else10 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_11: # %else14 +; RV64ZVE32F-ZVFHMIN-NEXT: ret +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store5 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_8 +; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_9 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB71_11 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: ret + %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs + call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) + ret void +} + +declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>) + +define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) { +; RV32V-LABEL: mscatter_v1f32: +; RV32V: # %bb.0: +; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32V-NEXT: ret +; +; RV64V-LABEL: mscatter_v1f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret +; +; RV32ZVE32F-LABEL: mscatter_v1f32: +; RV32ZVE32F: # %bb.0: +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32ZVE32F-NEXT: ret +; +; RV64ZVE32F-LABEL: mscatter_v1f32: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vfirst.m a1, v0 +; RV64ZVE32F-NEXT: bnez a1, .LBB72_2 +; RV64ZVE32F-NEXT: # %bb.1: # %cond.store +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v8, (a0) +; RV64ZVE32F-NEXT: .LBB72_2: # %else +; RV64ZVE32F-NEXT: ret + call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m) + ret void +} + +declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) + +define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) { +; RV32V-LABEL: mscatter_v2f32: +; RV32V: # %bb.0: +; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32V-NEXT: ret +; +; RV64V-LABEL: mscatter_v2f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret +; +; RV32ZVE32F-LABEL: mscatter_v2f32: +; RV32ZVE32F: # %bb.0: +; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32ZVE32F-NEXT: ret +; +; RV64ZVE32F-LABEL: mscatter_v2f32: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a2, v0 +; RV64ZVE32F-NEXT: andi a3, a2, 1 +; RV64ZVE32F-NEXT: bnez a3, .LBB73_3 +; RV64ZVE32F-NEXT: # %bb.1: # %else +; RV64ZVE32F-NEXT: andi a2, a2, 2 +; RV64ZVE32F-NEXT: bnez a2, .LBB73_4 +; RV64ZVE32F-NEXT: .LBB73_2: # %else2 +; RV64ZVE32F-NEXT: ret +; RV64ZVE32F-NEXT: .LBB73_3: # %cond.store +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v8, (a0) +; RV64ZVE32F-NEXT: andi a2, a2, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB73_2 +; RV64ZVE32F-NEXT: .LBB73_4: # %cond.store1 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vse32.v v8, (a1) +; RV64ZVE32F-NEXT: ret + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m) + ret void +} + +declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) + +define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) { +; RV32-LABEL: mscatter_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64V-LABEL: mscatter_v4f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret +; +; RV64ZVE32F-LABEL: mscatter_v4f32: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: ld a4, 8(a0) +; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a3, v0 +; RV64ZVE32F-NEXT: andi a5, a3, 1 +; RV64ZVE32F-NEXT: bnez a5, .LBB74_5 +; RV64ZVE32F-NEXT: # %bb.1: # %else +; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: bnez a0, .LBB74_6 +; RV64ZVE32F-NEXT: .LBB74_2: # %else2 +; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: bnez a0, .LBB74_7 +; RV64ZVE32F-NEXT: .LBB74_3: # %else4 +; RV64ZVE32F-NEXT: andi a3, a3, 8 +; RV64ZVE32F-NEXT: bnez a3, .LBB74_8 +; RV64ZVE32F-NEXT: .LBB74_4: # %else6 +; RV64ZVE32F-NEXT: ret +; RV64ZVE32F-NEXT: .LBB74_5: # %cond.store +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v8, (a0) +; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: beqz a0, .LBB74_2 +; RV64ZVE32F-NEXT: .LBB74_6: # %cond.store1 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-NEXT: vse32.v v9, (a4) +; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: beqz a0, .LBB74_3 +; RV64ZVE32F-NEXT: .LBB74_7: # %cond.store3 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse32.v v9, (a2) +; RV64ZVE32F-NEXT: andi a3, a3, 8 +; RV64ZVE32F-NEXT: beqz a3, .LBB74_4 +; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 +; RV64ZVE32F-NEXT: vse32.v v8, (a1) +; RV64ZVE32F-NEXT: ret + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m) + ret void +} + +define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { +; RV32-LABEL: mscatter_truemask_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mscatter_truemask_v4f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret +; +; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: ld a1, 0(a0) ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: ld a3, 16(a0) ; RV64ZVE32F-NEXT: ld a0, 24(a0) @@ -6888,11 +8547,11 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v8f32: ; RV64ZVE32F: # %bb.0: @@ -6906,76 +8565,76 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 ; RV64ZVE32F-NEXT: andi t1, a4, 1 -; RV64ZVE32F-NEXT: bnez t1, .LBB67_9 +; RV64ZVE32F-NEXT: bnez t1, .LBB77_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a4, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB67_10 -; RV64ZVE32F-NEXT: .LBB67_2: # %else2 +; RV64ZVE32F-NEXT: bnez a0, .LBB77_10 +; RV64ZVE32F-NEXT: .LBB77_2: # %else2 ; RV64ZVE32F-NEXT: andi a0, a4, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB67_11 -; RV64ZVE32F-NEXT: .LBB67_3: # %else4 +; RV64ZVE32F-NEXT: bnez a0, .LBB77_11 +; RV64ZVE32F-NEXT: .LBB77_3: # %else4 ; RV64ZVE32F-NEXT: andi a0, a4, 8 -; RV64ZVE32F-NEXT: bnez a0, .LBB67_12 -; RV64ZVE32F-NEXT: .LBB67_4: # %else6 +; RV64ZVE32F-NEXT: bnez a0, .LBB77_12 +; RV64ZVE32F-NEXT: .LBB77_4: # %else6 ; RV64ZVE32F-NEXT: andi a0, a4, 16 -; RV64ZVE32F-NEXT: bnez a0, .LBB67_13 -; RV64ZVE32F-NEXT: .LBB67_5: # %else8 +; RV64ZVE32F-NEXT: bnez a0, .LBB77_13 +; RV64ZVE32F-NEXT: .LBB77_5: # %else8 ; RV64ZVE32F-NEXT: andi a0, a4, 32 -; RV64ZVE32F-NEXT: bnez a0, .LBB67_14 -; RV64ZVE32F-NEXT: .LBB67_6: # %else10 +; RV64ZVE32F-NEXT: bnez a0, .LBB77_14 +; RV64ZVE32F-NEXT: .LBB77_6: # %else10 ; RV64ZVE32F-NEXT: andi a0, a4, 64 -; RV64ZVE32F-NEXT: bnez a0, .LBB67_15 -; RV64ZVE32F-NEXT: .LBB67_7: # %else12 +; RV64ZVE32F-NEXT: bnez a0, .LBB77_15 +; RV64ZVE32F-NEXT: .LBB77_7: # %else12 ; RV64ZVE32F-NEXT: andi a0, a4, -128 -; RV64ZVE32F-NEXT: bnez a0, .LBB67_16 -; RV64ZVE32F-NEXT: .LBB67_8: # %else14 +; RV64ZVE32F-NEXT: bnez a0, .LBB77_16 +; RV64ZVE32F-NEXT: .LBB77_8: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB67_9: # %cond.store +; RV64ZVE32F-NEXT: .LBB77_9: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a0) ; RV64ZVE32F-NEXT: andi a0, a4, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB67_2 -; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1 +; RV64ZVE32F-NEXT: beqz a0, .LBB77_2 +; RV64ZVE32F-NEXT: .LBB77_10: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v10, (t0) ; RV64ZVE32F-NEXT: andi a0, a4, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB67_3 -; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3 +; RV64ZVE32F-NEXT: beqz a0, .LBB77_3 +; RV64ZVE32F-NEXT: .LBB77_11: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v10, (a7) ; RV64ZVE32F-NEXT: andi a0, a4, 8 -; RV64ZVE32F-NEXT: beqz a0, .LBB67_4 -; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a0, .LBB77_4 +; RV64ZVE32F-NEXT: .LBB77_12: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a6) ; RV64ZVE32F-NEXT: andi a0, a4, 16 -; RV64ZVE32F-NEXT: beqz a0, .LBB67_5 -; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a0, .LBB77_5 +; RV64ZVE32F-NEXT: .LBB77_13: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a5) ; RV64ZVE32F-NEXT: andi a0, a4, 32 -; RV64ZVE32F-NEXT: beqz a0, .LBB67_6 -; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a0, .LBB77_6 +; RV64ZVE32F-NEXT: .LBB77_14: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a3) ; RV64ZVE32F-NEXT: andi a0, a4, 64 -; RV64ZVE32F-NEXT: beqz a0, .LBB67_7 -; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11 +; RV64ZVE32F-NEXT: beqz a0, .LBB77_7 +; RV64ZVE32F-NEXT: .LBB77_15: # %cond.store11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a0, a4, -128 -; RV64ZVE32F-NEXT: beqz a0, .LBB67_8 -; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a0, .LBB77_8 +; RV64ZVE32F-NEXT: .LBB77_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -6994,30 +8653,30 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i8_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i8_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB68_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB78_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB68_2: # %else +; RV64ZVE32F-NEXT: .LBB78_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB68_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB78_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -7027,23 +8686,23 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB68_4: # %else2 +; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB68_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB78_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB68_13 -; RV64ZVE32F-NEXT: .LBB68_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB78_13 +; RV64ZVE32F-NEXT: .LBB78_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB68_14 -; RV64ZVE32F-NEXT: .LBB68_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB78_14 +; RV64ZVE32F-NEXT: .LBB78_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB68_9 -; RV64ZVE32F-NEXT: .LBB68_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB78_9 +; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7053,17 +8712,17 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB68_9: # %else10 +; RV64ZVE32F-NEXT: .LBB78_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB68_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB78_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB68_16 -; RV64ZVE32F-NEXT: .LBB68_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB78_16 +; RV64ZVE32F-NEXT: .LBB78_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB68_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB78_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7072,8 +8731,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB68_6 -; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB78_6 +; RV64ZVE32F-NEXT: .LBB78_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7083,8 +8742,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB68_7 -; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB78_7 +; RV64ZVE32F-NEXT: .LBB78_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7094,9 +8753,9 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB68_8 -; RV64ZVE32F-NEXT: j .LBB68_9 -; RV64ZVE32F-NEXT: .LBB68_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 +; RV64ZVE32F-NEXT: j .LBB78_9 +; RV64ZVE32F-NEXT: .LBB78_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7105,8 +8764,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB68_11 -; RV64ZVE32F-NEXT: .LBB68_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB78_11 +; RV64ZVE32F-NEXT: .LBB78_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 @@ -7131,30 +8790,30 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB69_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB79_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB69_2: # %else +; RV64ZVE32F-NEXT: .LBB79_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB69_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB79_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -7164,23 +8823,23 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB69_4: # %else2 +; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB69_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB79_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB69_13 -; RV64ZVE32F-NEXT: .LBB69_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB79_13 +; RV64ZVE32F-NEXT: .LBB79_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB69_14 -; RV64ZVE32F-NEXT: .LBB69_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB79_14 +; RV64ZVE32F-NEXT: .LBB79_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB69_9 -; RV64ZVE32F-NEXT: .LBB69_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB79_9 +; RV64ZVE32F-NEXT: .LBB79_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7190,17 +8849,17 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB69_9: # %else10 +; RV64ZVE32F-NEXT: .LBB79_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB69_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB79_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB69_16 -; RV64ZVE32F-NEXT: .LBB69_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB79_16 +; RV64ZVE32F-NEXT: .LBB79_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB69_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB79_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7209,8 +8868,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB69_6 -; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB79_6 +; RV64ZVE32F-NEXT: .LBB79_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7220,8 +8879,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB69_7 -; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB79_7 +; RV64ZVE32F-NEXT: .LBB79_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7231,9 +8890,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB69_8 -; RV64ZVE32F-NEXT: j .LBB69_9 -; RV64ZVE32F-NEXT: .LBB69_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB79_8 +; RV64ZVE32F-NEXT: j .LBB79_9 +; RV64ZVE32F-NEXT: .LBB79_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7242,8 +8901,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB69_11 -; RV64ZVE32F-NEXT: .LBB69_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB79_11 +; RV64ZVE32F-NEXT: .LBB79_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 @@ -7270,21 +8929,21 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v11, v10 -; RV64-NEXT: vsll.vi v10, v11, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v11, v10 +; RV64V-NEXT: vsll.vi v10, v11, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB70_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -7292,9 +8951,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB70_2: # %else +; RV64ZVE32F-NEXT: .LBB80_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB70_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB80_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -7305,23 +8964,23 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB70_4: # %else2 +; RV64ZVE32F-NEXT: .LBB80_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_13 -; RV64ZVE32F-NEXT: .LBB70_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_13 +; RV64ZVE32F-NEXT: .LBB80_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_14 -; RV64ZVE32F-NEXT: .LBB70_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_14 +; RV64ZVE32F-NEXT: .LBB80_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB70_9 -; RV64ZVE32F-NEXT: .LBB70_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB80_9 +; RV64ZVE32F-NEXT: .LBB80_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7332,17 +8991,17 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB70_9: # %else10 +; RV64ZVE32F-NEXT: .LBB80_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB70_16 -; RV64ZVE32F-NEXT: .LBB70_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB80_16 +; RV64ZVE32F-NEXT: .LBB80_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB70_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7352,8 +9011,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB70_6 -; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB80_6 +; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7364,8 +9023,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB70_7 -; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB80_7 +; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -7376,9 +9035,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB70_8 -; RV64ZVE32F-NEXT: j .LBB70_9 -; RV64ZVE32F-NEXT: .LBB70_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB80_8 +; RV64ZVE32F-NEXT: j .LBB80_9 +; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7388,8 +9047,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB70_11 -; RV64ZVE32F-NEXT: .LBB70_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB80_11 +; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 @@ -7416,21 +9075,21 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i16_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i16_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB81_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7438,9 +9097,9 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB71_2: # %else +; RV64ZVE32F-NEXT: .LBB81_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB81_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -7450,23 +9109,23 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB71_4: # %else2 +; RV64ZVE32F-NEXT: .LBB81_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB81_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_13 -; RV64ZVE32F-NEXT: .LBB71_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB81_13 +; RV64ZVE32F-NEXT: .LBB81_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_14 -; RV64ZVE32F-NEXT: .LBB71_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB81_14 +; RV64ZVE32F-NEXT: .LBB81_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_9 -; RV64ZVE32F-NEXT: .LBB71_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB81_9 +; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7476,17 +9135,17 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB71_9: # %else10 +; RV64ZVE32F-NEXT: .LBB81_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB81_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB71_16 -; RV64ZVE32F-NEXT: .LBB71_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB81_16 +; RV64ZVE32F-NEXT: .LBB81_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB71_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7495,8 +9154,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_6 -; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB81_6 +; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7506,8 +9165,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_7 -; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB81_7 +; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7517,9 +9176,9 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_8 -; RV64ZVE32F-NEXT: j .LBB71_9 -; RV64ZVE32F-NEXT: .LBB71_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB81_8 +; RV64ZVE32F-NEXT: j .LBB81_9 +; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7528,8 +9187,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB71_11 -; RV64ZVE32F-NEXT: .LBB71_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB81_11 +; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 @@ -7554,21 +9213,21 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB72_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB82_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7576,9 +9235,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB72_2: # %else +; RV64ZVE32F-NEXT: .LBB82_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB72_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB82_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -7588,23 +9247,23 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB72_4: # %else2 +; RV64ZVE32F-NEXT: .LBB82_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB72_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB82_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB72_13 -; RV64ZVE32F-NEXT: .LBB72_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB82_13 +; RV64ZVE32F-NEXT: .LBB82_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB72_14 -; RV64ZVE32F-NEXT: .LBB72_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB82_14 +; RV64ZVE32F-NEXT: .LBB82_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB72_9 -; RV64ZVE32F-NEXT: .LBB72_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB82_9 +; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7614,17 +9273,17 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB72_9: # %else10 +; RV64ZVE32F-NEXT: .LBB82_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB72_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB82_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB72_16 -; RV64ZVE32F-NEXT: .LBB72_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB82_16 +; RV64ZVE32F-NEXT: .LBB82_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB72_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7633,8 +9292,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB72_6 -; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB82_6 +; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7644,8 +9303,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB72_7 -; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB82_7 +; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7655,9 +9314,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB72_8 -; RV64ZVE32F-NEXT: j .LBB72_9 -; RV64ZVE32F-NEXT: .LBB72_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB82_8 +; RV64ZVE32F-NEXT: j .LBB82_9 +; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7666,8 +9325,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB72_11 -; RV64ZVE32F-NEXT: .LBB72_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB82_11 +; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 @@ -7693,13 +9352,13 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v10, v12, 2 -; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v12, v10 +; RV64V-NEXT: vsll.vi v10, v12, 2 +; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32: ; RV64ZVE32F: # %bb.0: @@ -7708,7 +9367,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB73_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB83_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 @@ -7717,9 +9376,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a3) -; RV64ZVE32F-NEXT: .LBB73_2: # %else +; RV64ZVE32F-NEXT: .LBB83_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB73_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB83_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -7730,23 +9389,23 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v11, (a3) -; RV64ZVE32F-NEXT: .LBB73_4: # %else2 +; RV64ZVE32F-NEXT: .LBB83_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB73_12 +; RV64ZVE32F-NEXT: bnez a3, .LBB83_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB73_13 -; RV64ZVE32F-NEXT: .LBB73_6: # %else6 +; RV64ZVE32F-NEXT: bnez a3, .LBB83_13 +; RV64ZVE32F-NEXT: .LBB83_6: # %else6 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB73_14 -; RV64ZVE32F-NEXT: .LBB73_7: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB83_14 +; RV64ZVE32F-NEXT: .LBB83_7: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB73_9 -; RV64ZVE32F-NEXT: .LBB73_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a3, .LBB83_9 +; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 @@ -7757,17 +9416,17 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) -; RV64ZVE32F-NEXT: .LBB73_9: # %else10 +; RV64ZVE32F-NEXT: .LBB83_9: # %else10 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB73_15 +; RV64ZVE32F-NEXT: bnez a3, .LBB83_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB73_16 -; RV64ZVE32F-NEXT: .LBB73_11: # %else14 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_16 +; RV64ZVE32F-NEXT: .LBB83_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB73_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 @@ -7777,8 +9436,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB73_6 -; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a3, .LBB83_6 +; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 @@ -7789,8 +9448,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB73_7 -; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a3, .LBB83_7 +; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -7801,9 +9460,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB73_8 -; RV64ZVE32F-NEXT: j .LBB73_9 -; RV64ZVE32F-NEXT: .LBB73_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a3, .LBB83_8 +; RV64ZVE32F-NEXT: j .LBB83_9 +; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 @@ -7813,8 +9472,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB73_11 -; RV64ZVE32F-NEXT: .LBB73_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_11 +; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7840,30 +9499,30 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8f32: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf2 v12, v10 +; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vse32.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB74_2: # %else +; RV64ZVE32F-NEXT: .LBB84_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 @@ -7872,23 +9531,23 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB74_4: # %else2 +; RV64ZVE32F-NEXT: .LBB84_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_13 -; RV64ZVE32F-NEXT: .LBB74_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_13 +; RV64ZVE32F-NEXT: .LBB84_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_14 -; RV64ZVE32F-NEXT: .LBB74_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 +; RV64ZVE32F-NEXT: .LBB84_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_9 -; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_9 +; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7898,17 +9557,17 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) -; RV64ZVE32F-NEXT: .LBB74_9: # %else10 +; RV64ZVE32F-NEXT: .LBB84_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB74_16 -; RV64ZVE32F-NEXT: .LBB74_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB84_16 +; RV64ZVE32F-NEXT: .LBB84_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB74_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7916,8 +9575,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 -; RV64ZVE32F-NEXT: .LBB74_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_6 +; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -7926,8 +9585,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 -; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_7 +; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7936,9 +9595,9 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 -; RV64ZVE32F-NEXT: j .LBB74_9 -; RV64ZVE32F-NEXT: .LBB74_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_8 +; RV64ZVE32F-NEXT: j .LBB84_9 +; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7947,8 +9606,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB74_11 -; RV64ZVE32F-NEXT: .LBB74_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB84_11 +; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 @@ -7973,32 +9632,32 @@ define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v1f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v1f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v1f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vfirst.m a0, v0 -; RV32ZVE32F-NEXT: bnez a0, .LBB75_2 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_2 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) -; RV32ZVE32F-NEXT: .LBB75_2: # %else +; RV32ZVE32F-NEXT: .LBB85_2: # %else ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v1f64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vfirst.m a1, v0 -; RV64ZVE32F-NEXT: bnez a1, .LBB75_2 +; RV64ZVE32F-NEXT: bnez a1, .LBB85_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) -; RV64ZVE32F-NEXT: .LBB75_2: # %else +; RV64ZVE32F-NEXT: .LBB85_2: # %else ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m) ret void @@ -8013,30 +9672,30 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v2f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v2f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v2f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 ; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB76_3 +; RV32ZVE32F-NEXT: bnez a1, .LBB86_3 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a0, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB76_4 -; RV32ZVE32F-NEXT: .LBB76_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_4 +; RV32ZVE32F-NEXT: .LBB86_2: # %else2 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB76_3: # %cond.store +; RV32ZVE32F-NEXT: .LBB86_3: # %cond.store ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a0, a0, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB76_2 -; RV32ZVE32F-NEXT: .LBB76_4: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_2 +; RV32ZVE32F-NEXT: .LBB86_4: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -8048,17 +9707,17 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: bnez a3, .LBB76_3 +; RV64ZVE32F-NEXT: bnez a3, .LBB86_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB76_4 -; RV64ZVE32F-NEXT: .LBB76_2: # %else2 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_4 +; RV64ZVE32F-NEXT: .LBB86_2: # %else2 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB76_3: # %cond.store +; RV64ZVE32F-NEXT: .LBB86_3: # %cond.store ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: andi a2, a2, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB76_2 -; RV64ZVE32F-NEXT: .LBB76_4: # %cond.store1 +; RV64ZVE32F-NEXT: beqz a2, .LBB86_2 +; RV64ZVE32F-NEXT: .LBB86_4: # %cond.store1 ; RV64ZVE32F-NEXT: fsd fa1, 0(a1) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m) @@ -8074,50 +9733,50 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v4f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v4f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 ; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB77_5 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_5 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB77_6 -; RV32ZVE32F-NEXT: .LBB77_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_6 +; RV32ZVE32F-NEXT: .LBB87_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB77_7 -; RV32ZVE32F-NEXT: .LBB77_3: # %else4 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_7 +; RV32ZVE32F-NEXT: .LBB87_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a0, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB77_8 -; RV32ZVE32F-NEXT: .LBB77_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_8 +; RV32ZVE32F-NEXT: .LBB87_4: # %else6 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB77_5: # %cond.store +; RV32ZVE32F-NEXT: .LBB87_5: # %cond.store ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB77_2 -; RV32ZVE32F-NEXT: .LBB77_6: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 +; RV32ZVE32F-NEXT: .LBB87_6: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB77_3 -; RV32ZVE32F-NEXT: .LBB77_7: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 +; RV32ZVE32F-NEXT: .LBB87_7: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a0, a0, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB77_4 -; RV32ZVE32F-NEXT: .LBB77_8: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_4 +; RV32ZVE32F-NEXT: .LBB87_8: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -8132,32 +9791,32 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a5, a3, 1 -; RV64ZVE32F-NEXT: bnez a5, .LBB77_5 +; RV64ZVE32F-NEXT: bnez a5, .LBB87_5 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB77_6 -; RV64ZVE32F-NEXT: .LBB77_2: # %else2 +; RV64ZVE32F-NEXT: bnez a0, .LBB87_6 +; RV64ZVE32F-NEXT: .LBB87_2: # %else2 ; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB77_7 -; RV64ZVE32F-NEXT: .LBB77_3: # %else4 +; RV64ZVE32F-NEXT: bnez a0, .LBB87_7 +; RV64ZVE32F-NEXT: .LBB87_3: # %else4 ; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB77_8 -; RV64ZVE32F-NEXT: .LBB77_4: # %else6 +; RV64ZVE32F-NEXT: bnez a3, .LBB87_8 +; RV64ZVE32F-NEXT: .LBB87_4: # %else6 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB77_5: # %cond.store +; RV64ZVE32F-NEXT: .LBB87_5: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB77_2 -; RV64ZVE32F-NEXT: .LBB77_6: # %cond.store1 +; RV64ZVE32F-NEXT: beqz a0, .LBB87_2 +; RV64ZVE32F-NEXT: .LBB87_6: # %cond.store1 ; RV64ZVE32F-NEXT: fsd fa1, 0(a4) ; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB77_3 -; RV64ZVE32F-NEXT: .LBB77_7: # %cond.store3 +; RV64ZVE32F-NEXT: beqz a0, .LBB87_3 +; RV64ZVE32F-NEXT: .LBB87_7: # %cond.store3 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB77_4 -; RV64ZVE32F-NEXT: .LBB77_8: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a3, .LBB87_4 +; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store5 ; RV64ZVE32F-NEXT: fsd fa3, 0(a1) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m) @@ -8171,11 +9830,11 @@ define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v10 ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_truemask_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v10 -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_truemask_v4f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v10 +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64: ; RV32ZVE32F: # %bb.0: @@ -8225,90 +9884,90 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_v8f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 ; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB80_9 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB80_10 -; RV32ZVE32F-NEXT: .LBB80_2: # %else2 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_10 +; RV32ZVE32F-NEXT: .LBB90_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB80_11 -; RV32ZVE32F-NEXT: .LBB80_3: # %else4 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 +; RV32ZVE32F-NEXT: .LBB90_3: # %else4 ; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB80_12 -; RV32ZVE32F-NEXT: .LBB80_4: # %else6 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 +; RV32ZVE32F-NEXT: .LBB90_4: # %else6 ; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB80_13 -; RV32ZVE32F-NEXT: .LBB80_5: # %else8 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 +; RV32ZVE32F-NEXT: .LBB90_5: # %else8 ; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB80_14 -; RV32ZVE32F-NEXT: .LBB80_6: # %else10 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 +; RV32ZVE32F-NEXT: .LBB90_6: # %else10 ; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB80_15 -; RV32ZVE32F-NEXT: .LBB80_7: # %else12 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 +; RV32ZVE32F-NEXT: .LBB90_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a0, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB80_16 -; RV32ZVE32F-NEXT: .LBB80_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_16 +; RV32ZVE32F-NEXT: .LBB90_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB80_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB80_2 -; RV32ZVE32F-NEXT: .LBB80_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 +; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB80_3 -; RV32ZVE32F-NEXT: .LBB80_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 +; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB80_4 -; RV32ZVE32F-NEXT: .LBB80_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 +; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB80_5 -; RV32ZVE32F-NEXT: .LBB80_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 +; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB80_6 -; RV32ZVE32F-NEXT: .LBB80_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 +; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB80_7 -; RV32ZVE32F-NEXT: .LBB80_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 +; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a0, a0, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB80_8 -; RV32ZVE32F-NEXT: .LBB80_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_8 +; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -8327,60 +9986,60 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 ; RV64ZVE32F-NEXT: andi t1, a4, 1 -; RV64ZVE32F-NEXT: bnez t1, .LBB80_9 +; RV64ZVE32F-NEXT: bnez t1, .LBB90_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a4, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB80_10 -; RV64ZVE32F-NEXT: .LBB80_2: # %else2 +; RV64ZVE32F-NEXT: bnez a0, .LBB90_10 +; RV64ZVE32F-NEXT: .LBB90_2: # %else2 ; RV64ZVE32F-NEXT: andi a0, a4, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB80_11 -; RV64ZVE32F-NEXT: .LBB80_3: # %else4 +; RV64ZVE32F-NEXT: bnez a0, .LBB90_11 +; RV64ZVE32F-NEXT: .LBB90_3: # %else4 ; RV64ZVE32F-NEXT: andi a0, a4, 8 -; RV64ZVE32F-NEXT: bnez a0, .LBB80_12 -; RV64ZVE32F-NEXT: .LBB80_4: # %else6 +; RV64ZVE32F-NEXT: bnez a0, .LBB90_12 +; RV64ZVE32F-NEXT: .LBB90_4: # %else6 ; RV64ZVE32F-NEXT: andi a0, a4, 16 -; RV64ZVE32F-NEXT: bnez a0, .LBB80_13 -; RV64ZVE32F-NEXT: .LBB80_5: # %else8 +; RV64ZVE32F-NEXT: bnez a0, .LBB90_13 +; RV64ZVE32F-NEXT: .LBB90_5: # %else8 ; RV64ZVE32F-NEXT: andi a0, a4, 32 -; RV64ZVE32F-NEXT: bnez a0, .LBB80_14 -; RV64ZVE32F-NEXT: .LBB80_6: # %else10 +; RV64ZVE32F-NEXT: bnez a0, .LBB90_14 +; RV64ZVE32F-NEXT: .LBB90_6: # %else10 ; RV64ZVE32F-NEXT: andi a0, a4, 64 -; RV64ZVE32F-NEXT: bnez a0, .LBB80_15 -; RV64ZVE32F-NEXT: .LBB80_7: # %else12 +; RV64ZVE32F-NEXT: bnez a0, .LBB90_15 +; RV64ZVE32F-NEXT: .LBB90_7: # %else12 ; RV64ZVE32F-NEXT: andi a0, a4, -128 -; RV64ZVE32F-NEXT: bnez a0, .LBB80_16 -; RV64ZVE32F-NEXT: .LBB80_8: # %else14 +; RV64ZVE32F-NEXT: bnez a0, .LBB90_16 +; RV64ZVE32F-NEXT: .LBB90_8: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store +; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) ; RV64ZVE32F-NEXT: andi a0, a4, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB80_2 -; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1 +; RV64ZVE32F-NEXT: beqz a0, .LBB90_2 +; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1 ; RV64ZVE32F-NEXT: fsd fa1, 0(t0) ; RV64ZVE32F-NEXT: andi a0, a4, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB80_3 -; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3 +; RV64ZVE32F-NEXT: beqz a0, .LBB90_3 +; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3 ; RV64ZVE32F-NEXT: fsd fa2, 0(a7) ; RV64ZVE32F-NEXT: andi a0, a4, 8 -; RV64ZVE32F-NEXT: beqz a0, .LBB80_4 -; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a0, .LBB90_4 +; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5 ; RV64ZVE32F-NEXT: fsd fa3, 0(a6) ; RV64ZVE32F-NEXT: andi a0, a4, 16 -; RV64ZVE32F-NEXT: beqz a0, .LBB80_5 -; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a0, .LBB90_5 +; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7 ; RV64ZVE32F-NEXT: fsd fa4, 0(a5) ; RV64ZVE32F-NEXT: andi a0, a4, 32 -; RV64ZVE32F-NEXT: beqz a0, .LBB80_6 -; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a0, .LBB90_6 +; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9 ; RV64ZVE32F-NEXT: fsd fa5, 0(a3) ; RV64ZVE32F-NEXT: andi a0, a4, 64 -; RV64ZVE32F-NEXT: beqz a0, .LBB80_7 -; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11 +; RV64ZVE32F-NEXT: beqz a0, .LBB90_7 +; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a0, a4, -128 -; RV64ZVE32F-NEXT: beqz a0, .LBB80_8 -; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a0, .LBB90_8 +; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13 ; RV64ZVE32F-NEXT: fsd fa7, 0(a1) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) @@ -8397,13 +10056,13 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i8_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i8_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64: ; RV32ZVE32F: # %bb.0: @@ -8415,78 +10074,78 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB81_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB91_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_10 -; RV32ZVE32F-NEXT: .LBB81_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_10 +; RV32ZVE32F-NEXT: .LBB91_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_11 -; RV32ZVE32F-NEXT: .LBB81_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_11 +; RV32ZVE32F-NEXT: .LBB91_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_12 -; RV32ZVE32F-NEXT: .LBB81_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_12 +; RV32ZVE32F-NEXT: .LBB91_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_13 -; RV32ZVE32F-NEXT: .LBB81_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_13 +; RV32ZVE32F-NEXT: .LBB91_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_14 -; RV32ZVE32F-NEXT: .LBB81_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_14 +; RV32ZVE32F-NEXT: .LBB91_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_15 -; RV32ZVE32F-NEXT: .LBB81_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_15 +; RV32ZVE32F-NEXT: .LBB91_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB81_16 -; RV32ZVE32F-NEXT: .LBB81_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_16 +; RV32ZVE32F-NEXT: .LBB91_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB91_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB81_2 -; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB91_2 +; RV32ZVE32F-NEXT: .LBB91_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB81_3 -; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB91_3 +; RV32ZVE32F-NEXT: .LBB91_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB81_4 -; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB91_4 +; RV32ZVE32F-NEXT: .LBB91_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB81_5 -; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB91_5 +; RV32ZVE32F-NEXT: .LBB91_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB81_6 -; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB91_6 +; RV32ZVE32F-NEXT: .LBB91_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB81_7 -; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB91_7 +; RV32ZVE32F-NEXT: .LBB91_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB81_8 -; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB91_8 +; RV32ZVE32F-NEXT: .LBB91_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -8498,15 +10157,15 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB81_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB91_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB81_2: # %else +; RV64ZVE32F-NEXT: .LBB91_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB81_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB91_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8514,68 +10173,68 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB81_4: # %else2 +; RV64ZVE32F-NEXT: .LBB91_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB81_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB81_13 -; RV64ZVE32F-NEXT: .LBB81_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_13 +; RV64ZVE32F-NEXT: .LBB91_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB81_14 -; RV64ZVE32F-NEXT: .LBB81_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_14 +; RV64ZVE32F-NEXT: .LBB91_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB81_9 -; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB91_9 +; RV64ZVE32F-NEXT: .LBB91_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB81_9: # %else10 +; RV64ZVE32F-NEXT: .LBB91_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB81_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB81_16 -; RV64ZVE32F-NEXT: .LBB81_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB91_16 +; RV64ZVE32F-NEXT: .LBB91_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB91_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB81_6 -; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB91_6 +; RV64ZVE32F-NEXT: .LBB91_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB81_7 -; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB91_7 +; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB81_8 -; RV64ZVE32F-NEXT: j .LBB81_9 -; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_8 +; RV64ZVE32F-NEXT: j .LBB91_9 +; RV64ZVE32F-NEXT: .LBB91_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB81_11 -; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB91_11 +; RV64ZVE32F-NEXT: .LBB91_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -8597,13 +10256,13 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64: ; RV32ZVE32F: # %bb.0: @@ -8615,78 +10274,78 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB82_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB92_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_10 -; RV32ZVE32F-NEXT: .LBB82_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_10 +; RV32ZVE32F-NEXT: .LBB92_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_11 -; RV32ZVE32F-NEXT: .LBB82_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_11 +; RV32ZVE32F-NEXT: .LBB92_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_12 -; RV32ZVE32F-NEXT: .LBB82_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_12 +; RV32ZVE32F-NEXT: .LBB92_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_13 -; RV32ZVE32F-NEXT: .LBB82_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_13 +; RV32ZVE32F-NEXT: .LBB92_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_14 -; RV32ZVE32F-NEXT: .LBB82_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_14 +; RV32ZVE32F-NEXT: .LBB92_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_15 -; RV32ZVE32F-NEXT: .LBB82_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_15 +; RV32ZVE32F-NEXT: .LBB92_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB82_16 -; RV32ZVE32F-NEXT: .LBB82_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_16 +; RV32ZVE32F-NEXT: .LBB92_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB92_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_2 -; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_2 +; RV32ZVE32F-NEXT: .LBB92_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_3 -; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_3 +; RV32ZVE32F-NEXT: .LBB92_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_4 -; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_4 +; RV32ZVE32F-NEXT: .LBB92_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_5 -; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_5 +; RV32ZVE32F-NEXT: .LBB92_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_6 -; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_6 +; RV32ZVE32F-NEXT: .LBB92_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_7 -; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_7 +; RV32ZVE32F-NEXT: .LBB92_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB82_8 -; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB92_8 +; RV32ZVE32F-NEXT: .LBB92_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -8698,15 +10357,15 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB82_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB92_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB82_2: # %else +; RV64ZVE32F-NEXT: .LBB92_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB82_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB92_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8714,68 +10373,68 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB82_4: # %else2 +; RV64ZVE32F-NEXT: .LBB92_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB82_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB82_13 -; RV64ZVE32F-NEXT: .LBB82_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_13 +; RV64ZVE32F-NEXT: .LBB92_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB82_14 -; RV64ZVE32F-NEXT: .LBB82_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_14 +; RV64ZVE32F-NEXT: .LBB92_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB82_9 -; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB92_9 +; RV64ZVE32F-NEXT: .LBB92_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB82_9: # %else10 +; RV64ZVE32F-NEXT: .LBB92_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB82_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB82_16 -; RV64ZVE32F-NEXT: .LBB82_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB92_16 +; RV64ZVE32F-NEXT: .LBB92_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB92_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB82_6 -; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB92_6 +; RV64ZVE32F-NEXT: .LBB92_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB82_7 -; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB92_7 +; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB82_8 -; RV64ZVE32F-NEXT: j .LBB82_9 -; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_8 +; RV64ZVE32F-NEXT: j .LBB92_9 +; RV64ZVE32F-NEXT: .LBB92_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB82_11 -; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB92_11 +; RV64ZVE32F-NEXT: .LBB92_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -8798,14 +10457,14 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v13, v12 -; RV64-NEXT: vsll.vi v12, v13, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v13, v12 +; RV64V-NEXT: vsll.vi v12, v13, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV32ZVE32F: # %bb.0: @@ -8817,78 +10476,78 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB83_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB93_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB83_10 -; RV32ZVE32F-NEXT: .LBB83_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_10 +; RV32ZVE32F-NEXT: .LBB93_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB83_11 -; RV32ZVE32F-NEXT: .LBB83_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_11 +; RV32ZVE32F-NEXT: .LBB93_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB83_12 -; RV32ZVE32F-NEXT: .LBB83_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_12 +; RV32ZVE32F-NEXT: .LBB93_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB83_13 -; RV32ZVE32F-NEXT: .LBB83_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_13 +; RV32ZVE32F-NEXT: .LBB93_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB83_14 -; RV32ZVE32F-NEXT: .LBB83_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_14 +; RV32ZVE32F-NEXT: .LBB93_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB83_15 -; RV32ZVE32F-NEXT: .LBB83_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_15 +; RV32ZVE32F-NEXT: .LBB93_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB83_16 -; RV32ZVE32F-NEXT: .LBB83_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_16 +; RV32ZVE32F-NEXT: .LBB93_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB93_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB83_2 -; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB93_2 +; RV32ZVE32F-NEXT: .LBB93_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB83_3 -; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB93_3 +; RV32ZVE32F-NEXT: .LBB93_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB83_4 -; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB93_4 +; RV32ZVE32F-NEXT: .LBB93_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB83_5 -; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB93_5 +; RV32ZVE32F-NEXT: .LBB93_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB83_6 -; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB93_6 +; RV32ZVE32F-NEXT: .LBB93_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB83_7 -; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB93_7 +; RV32ZVE32F-NEXT: .LBB93_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB83_8 -; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB93_8 +; RV32ZVE32F-NEXT: .LBB93_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -8900,16 +10559,16 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB83_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB93_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB83_2: # %else +; RV64ZVE32F-NEXT: .LBB93_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB83_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB93_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -8918,47 +10577,47 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB83_4: # %else2 +; RV64ZVE32F-NEXT: .LBB93_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB93_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB83_13 -; RV64ZVE32F-NEXT: .LBB83_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB93_13 +; RV64ZVE32F-NEXT: .LBB93_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB83_14 -; RV64ZVE32F-NEXT: .LBB83_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB93_14 +; RV64ZVE32F-NEXT: .LBB93_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB83_9 -; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB93_9 +; RV64ZVE32F-NEXT: .LBB93_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB83_9: # %else10 +; RV64ZVE32F-NEXT: .LBB93_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB83_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB93_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB83_16 -; RV64ZVE32F-NEXT: .LBB83_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB93_16 +; RV64ZVE32F-NEXT: .LBB93_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB93_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB83_6 -; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB93_6 +; RV64ZVE32F-NEXT: .LBB93_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -8966,25 +10625,25 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB83_7 -; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB93_7 +; RV64ZVE32F-NEXT: .LBB93_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB83_8 -; RV64ZVE32F-NEXT: j .LBB83_9 -; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB93_8 +; RV64ZVE32F-NEXT: j .LBB93_9 +; RV64ZVE32F-NEXT: .LBB93_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB83_11 -; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB93_11 +; RV64ZVE32F-NEXT: .LBB93_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: andi a1, a1, 255 @@ -9008,13 +10667,13 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i16_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i16_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: @@ -9026,78 +10685,78 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB84_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB94_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB84_10 -; RV32ZVE32F-NEXT: .LBB84_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB94_10 +; RV32ZVE32F-NEXT: .LBB94_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB84_11 -; RV32ZVE32F-NEXT: .LBB84_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB94_11 +; RV32ZVE32F-NEXT: .LBB94_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB84_12 -; RV32ZVE32F-NEXT: .LBB84_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB94_12 +; RV32ZVE32F-NEXT: .LBB94_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB84_13 -; RV32ZVE32F-NEXT: .LBB84_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB94_13 +; RV32ZVE32F-NEXT: .LBB94_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB84_14 -; RV32ZVE32F-NEXT: .LBB84_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB94_14 +; RV32ZVE32F-NEXT: .LBB94_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB84_15 -; RV32ZVE32F-NEXT: .LBB84_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB94_15 +; RV32ZVE32F-NEXT: .LBB94_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB84_16 -; RV32ZVE32F-NEXT: .LBB84_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB94_16 +; RV32ZVE32F-NEXT: .LBB94_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB94_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB84_2 -; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB94_2 +; RV32ZVE32F-NEXT: .LBB94_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB84_3 -; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB94_3 +; RV32ZVE32F-NEXT: .LBB94_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB84_4 -; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB94_4 +; RV32ZVE32F-NEXT: .LBB94_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB84_5 -; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB94_5 +; RV32ZVE32F-NEXT: .LBB94_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB84_6 -; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB94_6 +; RV32ZVE32F-NEXT: .LBB94_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB84_7 -; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB94_7 +; RV32ZVE32F-NEXT: .LBB94_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB84_8 -; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB94_8 +; RV32ZVE32F-NEXT: .LBB94_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -9109,16 +10768,16 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB84_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB94_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB84_2: # %else +; RV64ZVE32F-NEXT: .LBB94_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB84_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB94_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -9126,68 +10785,68 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB84_4: # %else2 +; RV64ZVE32F-NEXT: .LBB94_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB94_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_13 -; RV64ZVE32F-NEXT: .LBB84_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB94_13 +; RV64ZVE32F-NEXT: .LBB94_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 -; RV64ZVE32F-NEXT: .LBB84_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB94_14 +; RV64ZVE32F-NEXT: .LBB94_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB84_9 -; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB94_9 +; RV64ZVE32F-NEXT: .LBB94_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB84_9: # %else10 +; RV64ZVE32F-NEXT: .LBB94_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB94_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB84_16 -; RV64ZVE32F-NEXT: .LBB84_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB94_16 +; RV64ZVE32F-NEXT: .LBB94_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB94_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB84_6 -; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB94_6 +; RV64ZVE32F-NEXT: .LBB94_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB84_7 -; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB94_7 +; RV64ZVE32F-NEXT: .LBB94_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_8 -; RV64ZVE32F-NEXT: j .LBB84_9 -; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB94_8 +; RV64ZVE32F-NEXT: j .LBB94_9 +; RV64ZVE32F-NEXT: .LBB94_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB84_11 -; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB94_11 +; RV64ZVE32F-NEXT: .LBB94_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -9209,13 +10868,13 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf4 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: @@ -9227,78 +10886,78 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB85_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB95_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB85_10 -; RV32ZVE32F-NEXT: .LBB85_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB95_10 +; RV32ZVE32F-NEXT: .LBB95_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB85_11 -; RV32ZVE32F-NEXT: .LBB85_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB95_11 +; RV32ZVE32F-NEXT: .LBB95_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB85_12 -; RV32ZVE32F-NEXT: .LBB85_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB95_12 +; RV32ZVE32F-NEXT: .LBB95_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB85_13 -; RV32ZVE32F-NEXT: .LBB85_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB95_13 +; RV32ZVE32F-NEXT: .LBB95_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB85_14 -; RV32ZVE32F-NEXT: .LBB85_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB95_14 +; RV32ZVE32F-NEXT: .LBB95_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB85_15 -; RV32ZVE32F-NEXT: .LBB85_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB95_15 +; RV32ZVE32F-NEXT: .LBB95_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB85_16 -; RV32ZVE32F-NEXT: .LBB85_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB95_16 +; RV32ZVE32F-NEXT: .LBB95_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB95_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB85_2 -; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB95_2 +; RV32ZVE32F-NEXT: .LBB95_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB85_3 -; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB95_3 +; RV32ZVE32F-NEXT: .LBB95_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB85_4 -; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB95_4 +; RV32ZVE32F-NEXT: .LBB95_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB85_5 -; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB95_5 +; RV32ZVE32F-NEXT: .LBB95_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB85_6 -; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB95_6 +; RV32ZVE32F-NEXT: .LBB95_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB85_7 -; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB95_7 +; RV32ZVE32F-NEXT: .LBB95_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB85_8 -; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB95_8 +; RV32ZVE32F-NEXT: .LBB95_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -9310,16 +10969,16 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB85_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB95_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB85_2: # %else +; RV64ZVE32F-NEXT: .LBB95_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB85_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB95_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -9327,68 +10986,68 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB85_4: # %else2 +; RV64ZVE32F-NEXT: .LBB95_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB95_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_13 -; RV64ZVE32F-NEXT: .LBB85_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB95_13 +; RV64ZVE32F-NEXT: .LBB95_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_14 -; RV64ZVE32F-NEXT: .LBB85_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB95_14 +; RV64ZVE32F-NEXT: .LBB95_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB85_9 -; RV64ZVE32F-NEXT: .LBB85_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB95_9 +; RV64ZVE32F-NEXT: .LBB95_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB85_9: # %else10 +; RV64ZVE32F-NEXT: .LBB95_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB95_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB85_16 -; RV64ZVE32F-NEXT: .LBB85_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB95_16 +; RV64ZVE32F-NEXT: .LBB95_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB85_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB95_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB85_6 -; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB95_6 +; RV64ZVE32F-NEXT: .LBB95_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB85_7 -; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB95_7 +; RV64ZVE32F-NEXT: .LBB95_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_8 -; RV64ZVE32F-NEXT: j .LBB85_9 -; RV64ZVE32F-NEXT: .LBB85_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB95_8 +; RV64ZVE32F-NEXT: j .LBB95_9 +; RV64ZVE32F-NEXT: .LBB95_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB85_11 -; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB95_11 +; RV64ZVE32F-NEXT: .LBB95_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -9411,14 +11070,14 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v14, v12 -; RV64-NEXT: vsll.vi v12, v14, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v14, v12 +; RV64V-NEXT: vsll.vi v12, v14, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: @@ -9430,78 +11089,78 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB86_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB96_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB86_10 -; RV32ZVE32F-NEXT: .LBB86_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB96_10 +; RV32ZVE32F-NEXT: .LBB96_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB86_11 -; RV32ZVE32F-NEXT: .LBB86_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB96_11 +; RV32ZVE32F-NEXT: .LBB96_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB86_12 -; RV32ZVE32F-NEXT: .LBB86_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB96_12 +; RV32ZVE32F-NEXT: .LBB96_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB86_13 -; RV32ZVE32F-NEXT: .LBB86_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB96_13 +; RV32ZVE32F-NEXT: .LBB96_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB86_14 -; RV32ZVE32F-NEXT: .LBB86_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB96_14 +; RV32ZVE32F-NEXT: .LBB96_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB86_15 -; RV32ZVE32F-NEXT: .LBB86_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB96_15 +; RV32ZVE32F-NEXT: .LBB96_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB86_16 -; RV32ZVE32F-NEXT: .LBB86_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB96_16 +; RV32ZVE32F-NEXT: .LBB96_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB96_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB86_2 -; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB96_2 +; RV32ZVE32F-NEXT: .LBB96_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB86_3 -; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB96_3 +; RV32ZVE32F-NEXT: .LBB96_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB86_4 -; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB96_4 +; RV32ZVE32F-NEXT: .LBB96_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB86_5 -; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB96_5 +; RV32ZVE32F-NEXT: .LBB96_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB86_6 -; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB96_6 +; RV32ZVE32F-NEXT: .LBB96_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB86_7 -; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB96_7 +; RV32ZVE32F-NEXT: .LBB96_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB86_8 -; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB96_8 +; RV32ZVE32F-NEXT: .LBB96_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -9515,7 +11174,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_2 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -9523,9 +11182,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: fsd fa0, 0(a3) -; RV64ZVE32F-NEXT: .LBB86_2: # %else +; RV64ZVE32F-NEXT: .LBB96_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_4 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 @@ -9534,47 +11193,47 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: fsd fa1, 0(a3) -; RV64ZVE32F-NEXT: .LBB86_4: # %else2 +; RV64ZVE32F-NEXT: .LBB96_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_12 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_13 -; RV64ZVE32F-NEXT: .LBB86_6: # %else6 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_13 +; RV64ZVE32F-NEXT: .LBB96_6: # %else6 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_14 -; RV64ZVE32F-NEXT: .LBB86_7: # %else8 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_14 +; RV64ZVE32F-NEXT: .LBB96_7: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_9 -; RV64ZVE32F-NEXT: .LBB86_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_9 +; RV64ZVE32F-NEXT: .LBB96_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: fsd fa5, 0(a3) -; RV64ZVE32F-NEXT: .LBB86_9: # %else10 +; RV64ZVE32F-NEXT: .LBB96_9: # %else10 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_15 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB86_16 -; RV64ZVE32F-NEXT: .LBB86_11: # %else14 +; RV64ZVE32F-NEXT: bnez a2, .LBB96_16 +; RV64ZVE32F-NEXT: .LBB96_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB86_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: fsd fa2, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_6 -; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_6 +; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -9582,25 +11241,25 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: fsd fa3, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB86_7 -; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a3, .LBB96_7 +; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: fsd fa4, 0(a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB86_8 -; RV64ZVE32F-NEXT: j .LBB86_9 -; RV64ZVE32F-NEXT: .LBB86_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a3, .LBB96_8 +; RV64ZVE32F-NEXT: j .LBB96_9 +; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: fsd fa6, 0(a3) ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB86_11 -; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a2, .LBB96_11 +; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: and a1, a2, a1 @@ -9623,13 +11282,13 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8i32_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8i32_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf2 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64: ; RV32ZVE32F: # %bb.0: @@ -9640,78 +11299,78 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB97_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB87_10 -; RV32ZVE32F-NEXT: .LBB87_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_10 +; RV32ZVE32F-NEXT: .LBB97_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB87_11 -; RV32ZVE32F-NEXT: .LBB87_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_11 +; RV32ZVE32F-NEXT: .LBB97_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB87_12 -; RV32ZVE32F-NEXT: .LBB87_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_12 +; RV32ZVE32F-NEXT: .LBB97_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB87_13 -; RV32ZVE32F-NEXT: .LBB87_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_13 +; RV32ZVE32F-NEXT: .LBB97_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB87_14 -; RV32ZVE32F-NEXT: .LBB87_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_14 +; RV32ZVE32F-NEXT: .LBB97_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB87_15 -; RV32ZVE32F-NEXT: .LBB87_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_15 +; RV32ZVE32F-NEXT: .LBB97_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB87_16 -; RV32ZVE32F-NEXT: .LBB87_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_16 +; RV32ZVE32F-NEXT: .LBB97_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB97_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB87_2 -; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB97_2 +; RV32ZVE32F-NEXT: .LBB97_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB87_3 -; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB97_3 +; RV32ZVE32F-NEXT: .LBB97_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB87_4 -; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB97_4 +; RV32ZVE32F-NEXT: .LBB97_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB87_5 -; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB97_5 +; RV32ZVE32F-NEXT: .LBB97_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB87_6 -; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB97_6 +; RV32ZVE32F-NEXT: .LBB97_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB87_7 -; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB97_7 +; RV32ZVE32F-NEXT: .LBB97_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB87_8 -; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB97_8 +; RV32ZVE32F-NEXT: .LBB97_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -9723,16 +11382,16 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB87_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB97_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB87_2: # %else +; RV64ZVE32F-NEXT: .LBB97_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB87_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB97_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 @@ -9740,68 +11399,68 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB87_4: # %else2 +; RV64ZVE32F-NEXT: .LBB97_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_13 -; RV64ZVE32F-NEXT: .LBB87_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_13 +; RV64ZVE32F-NEXT: .LBB97_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_14 -; RV64ZVE32F-NEXT: .LBB87_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_14 +; RV64ZVE32F-NEXT: .LBB97_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB87_9 -; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB97_9 +; RV64ZVE32F-NEXT: .LBB97_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB87_9: # %else10 +; RV64ZVE32F-NEXT: .LBB97_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB87_16 -; RV64ZVE32F-NEXT: .LBB87_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB97_16 +; RV64ZVE32F-NEXT: .LBB97_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB87_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB97_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB87_6 -; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB97_6 +; RV64ZVE32F-NEXT: .LBB97_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB87_7 -; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB97_7 +; RV64ZVE32F-NEXT: .LBB97_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_8 -; RV64ZVE32F-NEXT: j .LBB87_9 -; RV64ZVE32F-NEXT: .LBB87_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_8 +; RV64ZVE32F-NEXT: j .LBB97_9 +; RV64ZVE32F-NEXT: .LBB97_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB87_11 -; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB97_11 +; RV64ZVE32F-NEXT: .LBB97_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -9822,13 +11481,13 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsext.vf2 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64: ; RV32ZVE32F: # %bb.0: @@ -9839,78 +11498,78 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB98_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB88_10 -; RV32ZVE32F-NEXT: .LBB88_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_10 +; RV32ZVE32F-NEXT: .LBB98_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB88_11 -; RV32ZVE32F-NEXT: .LBB88_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_11 +; RV32ZVE32F-NEXT: .LBB98_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB88_12 -; RV32ZVE32F-NEXT: .LBB88_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_12 +; RV32ZVE32F-NEXT: .LBB98_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB88_13 -; RV32ZVE32F-NEXT: .LBB88_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_13 +; RV32ZVE32F-NEXT: .LBB98_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB88_14 -; RV32ZVE32F-NEXT: .LBB88_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_14 +; RV32ZVE32F-NEXT: .LBB98_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB88_15 -; RV32ZVE32F-NEXT: .LBB88_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_15 +; RV32ZVE32F-NEXT: .LBB98_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB88_16 -; RV32ZVE32F-NEXT: .LBB88_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_16 +; RV32ZVE32F-NEXT: .LBB98_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB98_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB88_2 -; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB98_2 +; RV32ZVE32F-NEXT: .LBB98_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB88_3 -; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB98_3 +; RV32ZVE32F-NEXT: .LBB98_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB88_4 -; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB98_4 +; RV32ZVE32F-NEXT: .LBB98_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB88_5 -; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB98_5 +; RV32ZVE32F-NEXT: .LBB98_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB88_6 -; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB98_6 +; RV32ZVE32F-NEXT: .LBB98_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB88_7 -; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB98_7 +; RV32ZVE32F-NEXT: .LBB98_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB88_8 -; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB98_8 +; RV32ZVE32F-NEXT: .LBB98_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -9922,16 +11581,16 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB88_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB98_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB88_2: # %else +; RV64ZVE32F-NEXT: .LBB98_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB88_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB98_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 @@ -9939,68 +11598,68 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB88_4: # %else2 +; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_13 -; RV64ZVE32F-NEXT: .LBB88_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_13 +; RV64ZVE32F-NEXT: .LBB98_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_14 -; RV64ZVE32F-NEXT: .LBB88_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_14 +; RV64ZVE32F-NEXT: .LBB98_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB88_9 -; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB98_9 +; RV64ZVE32F-NEXT: .LBB98_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB88_9: # %else10 +; RV64ZVE32F-NEXT: .LBB98_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB88_16 -; RV64ZVE32F-NEXT: .LBB88_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB98_16 +; RV64ZVE32F-NEXT: .LBB98_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB88_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB98_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB88_6 -; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB98_6 +; RV64ZVE32F-NEXT: .LBB98_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB88_7 -; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB98_7 +; RV64ZVE32F-NEXT: .LBB98_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_8 -; RV64ZVE32F-NEXT: j .LBB88_9 -; RV64ZVE32F-NEXT: .LBB88_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_8 +; RV64ZVE32F-NEXT: j .LBB98_9 +; RV64ZVE32F-NEXT: .LBB98_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB88_11 -; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB98_11 +; RV64ZVE32F-NEXT: .LBB98_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -10022,13 +11681,13 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vzext.vf2 v16, v12 +; RV64V-NEXT: vsll.vi v12, v16, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64: ; RV32ZVE32F: # %bb.0: @@ -10039,78 +11698,78 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB99_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB89_10 -; RV32ZVE32F-NEXT: .LBB89_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_10 +; RV32ZVE32F-NEXT: .LBB99_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB89_11 -; RV32ZVE32F-NEXT: .LBB89_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_11 +; RV32ZVE32F-NEXT: .LBB99_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB89_12 -; RV32ZVE32F-NEXT: .LBB89_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_12 +; RV32ZVE32F-NEXT: .LBB99_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB89_13 -; RV32ZVE32F-NEXT: .LBB89_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_13 +; RV32ZVE32F-NEXT: .LBB99_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB89_14 -; RV32ZVE32F-NEXT: .LBB89_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_14 +; RV32ZVE32F-NEXT: .LBB99_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB89_15 -; RV32ZVE32F-NEXT: .LBB89_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_15 +; RV32ZVE32F-NEXT: .LBB99_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB89_16 -; RV32ZVE32F-NEXT: .LBB89_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_16 +; RV32ZVE32F-NEXT: .LBB99_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB99_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB89_2 -; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB99_2 +; RV32ZVE32F-NEXT: .LBB99_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB89_3 -; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB99_3 +; RV32ZVE32F-NEXT: .LBB99_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB89_4 -; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB99_4 +; RV32ZVE32F-NEXT: .LBB99_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB89_5 -; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB99_5 +; RV32ZVE32F-NEXT: .LBB99_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB89_6 -; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB99_6 +; RV32ZVE32F-NEXT: .LBB99_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB89_7 -; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB99_7 +; RV32ZVE32F-NEXT: .LBB99_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB89_8 -; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB99_8 +; RV32ZVE32F-NEXT: .LBB99_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -10122,7 +11781,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB99_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10130,9 +11789,9 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: srli a2, a2, 29 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) -; RV64ZVE32F-NEXT: .LBB89_2: # %else +; RV64ZVE32F-NEXT: .LBB99_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB99_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 @@ -10141,47 +11800,47 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: srli a2, a2, 29 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) -; RV64ZVE32F-NEXT: .LBB89_4: # %else2 +; RV64ZVE32F-NEXT: .LBB99_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB89_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB99_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB89_13 -; RV64ZVE32F-NEXT: .LBB89_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB99_13 +; RV64ZVE32F-NEXT: .LBB99_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB89_14 -; RV64ZVE32F-NEXT: .LBB89_7: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB99_14 +; RV64ZVE32F-NEXT: .LBB99_7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_9 -; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a2, .LBB99_9 +; RV64ZVE32F-NEXT: .LBB99_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 ; RV64ZVE32F-NEXT: srli a2, a2, 29 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) -; RV64ZVE32F-NEXT: .LBB89_9: # %else10 +; RV64ZVE32F-NEXT: .LBB99_9: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB89_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB99_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB89_16 -; RV64ZVE32F-NEXT: .LBB89_11: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB99_16 +; RV64ZVE32F-NEXT: .LBB99_11: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB89_12: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB99_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 ; RV64ZVE32F-NEXT: srli a2, a2, 29 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_6 -; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB99_6 +; RV64ZVE32F-NEXT: .LBB99_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 @@ -10189,25 +11848,25 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_7 -; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB99_7 +; RV64ZVE32F-NEXT: .LBB99_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 32 ; RV64ZVE32F-NEXT: srli a2, a2, 29 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB89_8 -; RV64ZVE32F-NEXT: j .LBB89_9 -; RV64ZVE32F-NEXT: .LBB89_15: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB99_8 +; RV64ZVE32F-NEXT: j .LBB99_9 +; RV64ZVE32F-NEXT: .LBB99_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 ; RV64ZVE32F-NEXT: srli a2, a2, 29 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB89_11 -; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB99_11 +; RV64ZVE32F-NEXT: .LBB99_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 32 @@ -10231,12 +11890,12 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsll.vi v12, v12, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v8f64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vsll.vi v12, v12, 3 +; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64: ; RV32ZVE32F: # %bb.0: @@ -10263,78 +11922,78 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_9 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_10 -; RV32ZVE32F-NEXT: .LBB90_2: # %else2 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_10 +; RV32ZVE32F-NEXT: .LBB100_2: # %else2 ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_11 -; RV32ZVE32F-NEXT: .LBB90_3: # %else4 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_11 +; RV32ZVE32F-NEXT: .LBB100_3: # %else4 ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_12 -; RV32ZVE32F-NEXT: .LBB90_4: # %else6 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_12 +; RV32ZVE32F-NEXT: .LBB100_4: # %else6 ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_13 -; RV32ZVE32F-NEXT: .LBB90_5: # %else8 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_13 +; RV32ZVE32F-NEXT: .LBB100_5: # %else8 ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_14 -; RV32ZVE32F-NEXT: .LBB90_6: # %else10 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_14 +; RV32ZVE32F-NEXT: .LBB100_6: # %else10 ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_15 -; RV32ZVE32F-NEXT: .LBB90_7: # %else12 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_15 +; RV32ZVE32F-NEXT: .LBB100_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_16 -; RV32ZVE32F-NEXT: .LBB90_8: # %else14 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_16 +; RV32ZVE32F-NEXT: .LBB100_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store +; RV32ZVE32F-NEXT: .LBB100_9: # %cond.store ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_2 -; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1 +; RV32ZVE32F-NEXT: beqz a0, .LBB100_2 +; RV32ZVE32F-NEXT: .LBB100_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_3 -; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3 +; RV32ZVE32F-NEXT: beqz a0, .LBB100_3 +; RV32ZVE32F-NEXT: .LBB100_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_4 -; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5 +; RV32ZVE32F-NEXT: beqz a0, .LBB100_4 +; RV32ZVE32F-NEXT: .LBB100_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_5 -; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7 +; RV32ZVE32F-NEXT: beqz a0, .LBB100_5 +; RV32ZVE32F-NEXT: .LBB100_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_6 -; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9 +; RV32ZVE32F-NEXT: beqz a0, .LBB100_6 +; RV32ZVE32F-NEXT: .LBB100_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_7 -; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11 +; RV32ZVE32F-NEXT: beqz a0, .LBB100_7 +; RV32ZVE32F-NEXT: .LBB100_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0) ; RV32ZVE32F-NEXT: andi a0, a1, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_8 -; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13 +; RV32ZVE32F-NEXT: beqz a0, .LBB100_8 +; RV32ZVE32F-NEXT: .LBB100_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 @@ -10353,74 +12012,74 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi t2, a3, 1 -; RV64ZVE32F-NEXT: bnez t2, .LBB90_9 +; RV64ZVE32F-NEXT: bnez t2, .LBB100_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a1, a3, 2 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_10 -; RV64ZVE32F-NEXT: .LBB90_2: # %else2 +; RV64ZVE32F-NEXT: bnez a1, .LBB100_10 +; RV64ZVE32F-NEXT: .LBB100_2: # %else2 ; RV64ZVE32F-NEXT: andi a1, a3, 4 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_11 -; RV64ZVE32F-NEXT: .LBB90_3: # %else4 +; RV64ZVE32F-NEXT: bnez a1, .LBB100_11 +; RV64ZVE32F-NEXT: .LBB100_3: # %else4 ; RV64ZVE32F-NEXT: andi a1, a3, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_12 -; RV64ZVE32F-NEXT: .LBB90_4: # %else6 +; RV64ZVE32F-NEXT: bnez a1, .LBB100_12 +; RV64ZVE32F-NEXT: .LBB100_4: # %else6 ; RV64ZVE32F-NEXT: andi a1, a3, 16 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_13 -; RV64ZVE32F-NEXT: .LBB90_5: # %else8 +; RV64ZVE32F-NEXT: bnez a1, .LBB100_13 +; RV64ZVE32F-NEXT: .LBB100_5: # %else8 ; RV64ZVE32F-NEXT: andi a1, a3, 32 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_14 -; RV64ZVE32F-NEXT: .LBB90_6: # %else10 +; RV64ZVE32F-NEXT: bnez a1, .LBB100_14 +; RV64ZVE32F-NEXT: .LBB100_6: # %else10 ; RV64ZVE32F-NEXT: andi a1, a3, 64 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_15 -; RV64ZVE32F-NEXT: .LBB90_7: # %else12 +; RV64ZVE32F-NEXT: bnez a1, .LBB100_15 +; RV64ZVE32F-NEXT: .LBB100_7: # %else12 ; RV64ZVE32F-NEXT: andi a1, a3, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_16 -; RV64ZVE32F-NEXT: .LBB90_8: # %else14 +; RV64ZVE32F-NEXT: bnez a1, .LBB100_16 +; RV64ZVE32F-NEXT: .LBB100_8: # %else14 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store +; RV64ZVE32F-NEXT: .LBB100_9: # %cond.store ; RV64ZVE32F-NEXT: ld a1, 0(a1) ; RV64ZVE32F-NEXT: slli a1, a1, 3 ; RV64ZVE32F-NEXT: add a1, a0, a1 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1) ; RV64ZVE32F-NEXT: andi a1, a3, 2 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_2 -; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1 +; RV64ZVE32F-NEXT: beqz a1, .LBB100_2 +; RV64ZVE32F-NEXT: .LBB100_10: # %cond.store1 ; RV64ZVE32F-NEXT: slli t1, t1, 3 ; RV64ZVE32F-NEXT: add t1, a0, t1 ; RV64ZVE32F-NEXT: fsd fa1, 0(t1) ; RV64ZVE32F-NEXT: andi a1, a3, 4 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_3 -; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3 +; RV64ZVE32F-NEXT: beqz a1, .LBB100_3 +; RV64ZVE32F-NEXT: .LBB100_11: # %cond.store3 ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a0, t0 ; RV64ZVE32F-NEXT: fsd fa2, 0(t0) ; RV64ZVE32F-NEXT: andi a1, a3, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_4 -; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a1, .LBB100_4 +; RV64ZVE32F-NEXT: .LBB100_12: # %cond.store5 ; RV64ZVE32F-NEXT: slli a7, a7, 3 ; RV64ZVE32F-NEXT: add a7, a0, a7 ; RV64ZVE32F-NEXT: fsd fa3, 0(a7) ; RV64ZVE32F-NEXT: andi a1, a3, 16 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_5 -; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a1, .LBB100_5 +; RV64ZVE32F-NEXT: .LBB100_13: # %cond.store7 ; RV64ZVE32F-NEXT: slli a6, a6, 3 ; RV64ZVE32F-NEXT: add a6, a0, a6 ; RV64ZVE32F-NEXT: fsd fa4, 0(a6) ; RV64ZVE32F-NEXT: andi a1, a3, 32 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_6 -; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9 +; RV64ZVE32F-NEXT: beqz a1, .LBB100_6 +; RV64ZVE32F-NEXT: .LBB100_14: # %cond.store9 ; RV64ZVE32F-NEXT: slli a5, a5, 3 ; RV64ZVE32F-NEXT: add a5, a0, a5 ; RV64ZVE32F-NEXT: fsd fa5, 0(a5) ; RV64ZVE32F-NEXT: andi a1, a3, 64 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_7 -; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11 +; RV64ZVE32F-NEXT: beqz a1, .LBB100_7 +; RV64ZVE32F-NEXT: .LBB100_15: # %cond.store11 ; RV64ZVE32F-NEXT: slli a4, a4, 3 ; RV64ZVE32F-NEXT: add a4, a0, a4 ; RV64ZVE32F-NEXT: fsd fa6, 0(a4) ; RV64ZVE32F-NEXT: andi a1, a3, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_8 -; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a1, .LBB100_8 +; RV64ZVE32F-NEXT: .LBB100_16: # %cond.store13 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a0, a0, a2 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) @@ -10441,28 +12100,28 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v16i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v9 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v16i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v9 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vse8.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB91_2: # %else +; RV64ZVE32F-NEXT: .LBB101_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 @@ -10471,30 +12130,30 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) -; RV64ZVE32F-NEXT: .LBB91_4: # %else2 +; RV64ZVE32F-NEXT: .LBB101_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_25 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_26 -; RV64ZVE32F-NEXT: .LBB91_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_26 +; RV64ZVE32F-NEXT: .LBB101_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_8 -; RV64ZVE32F-NEXT: .LBB91_7: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_8 +; RV64ZVE32F-NEXT: .LBB101_7: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB91_8: # %else8 +; RV64ZVE32F-NEXT: .LBB101_8: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_10 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 @@ -10503,21 +12162,21 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB91_10: # %else10 +; RV64ZVE32F-NEXT: .LBB101_10: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_27 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_28 -; RV64ZVE32F-NEXT: .LBB91_12: # %else14 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_28 +; RV64ZVE32F-NEXT: .LBB101_12: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_29 -; RV64ZVE32F-NEXT: .LBB91_13: # %else16 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_29 +; RV64ZVE32F-NEXT: .LBB101_13: # %else16 ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_15 -; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store17 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_15 +; RV64ZVE32F-NEXT: .LBB101_14: # %cond.store17 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -10525,23 +12184,23 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) -; RV64ZVE32F-NEXT: .LBB91_15: # %else18 +; RV64ZVE32F-NEXT: .LBB101_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_30 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else20 ; RV64ZVE32F-NEXT: slli a2, a1, 52 -; RV64ZVE32F-NEXT: bltz a2, .LBB91_31 -; RV64ZVE32F-NEXT: .LBB91_17: # %else22 +; RV64ZVE32F-NEXT: bltz a2, .LBB101_31 +; RV64ZVE32F-NEXT: .LBB101_17: # %else22 ; RV64ZVE32F-NEXT: slli a2, a1, 51 -; RV64ZVE32F-NEXT: bltz a2, .LBB91_32 -; RV64ZVE32F-NEXT: .LBB91_18: # %else24 +; RV64ZVE32F-NEXT: bltz a2, .LBB101_32 +; RV64ZVE32F-NEXT: .LBB101_18: # %else24 ; RV64ZVE32F-NEXT: slli a2, a1, 50 -; RV64ZVE32F-NEXT: bgez a2, .LBB91_20 -; RV64ZVE32F-NEXT: .LBB91_19: # %cond.store25 +; RV64ZVE32F-NEXT: bgez a2, .LBB101_20 +; RV64ZVE32F-NEXT: .LBB101_19: # %cond.store25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -10549,21 +12208,21 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) -; RV64ZVE32F-NEXT: .LBB91_20: # %else26 +; RV64ZVE32F-NEXT: .LBB101_20: # %else26 ; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 -; RV64ZVE32F-NEXT: bgez a2, .LBB91_22 +; RV64ZVE32F-NEXT: bgez a2, .LBB101_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) -; RV64ZVE32F-NEXT: .LBB91_22: # %else28 +; RV64ZVE32F-NEXT: .LBB101_22: # %else28 ; RV64ZVE32F-NEXT: lui a2, 1048568 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB91_24 +; RV64ZVE32F-NEXT: beqz a1, .LBB101_24 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 @@ -10572,17 +12231,17 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15 ; RV64ZVE32F-NEXT: vse8.v v8, (a0) -; RV64ZVE32F-NEXT: .LBB91_24: # %else30 +; RV64ZVE32F-NEXT: .LBB101_24: # %else30 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB91_25: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB101_25: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_6 -; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_6 +; RV64ZVE32F-NEXT: .LBB101_26: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -10591,17 +12250,17 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_7 -; RV64ZVE32F-NEXT: j .LBB91_8 -; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_7 +; RV64ZVE32F-NEXT: j .LBB101_8 +; RV64ZVE32F-NEXT: .LBB101_27: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_12 -; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_12 +; RV64ZVE32F-NEXT: .LBB101_28: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -10610,25 +12269,25 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_13 -; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store15 +; RV64ZVE32F-NEXT: beqz a2, .LBB101_13 +; RV64ZVE32F-NEXT: .LBB101_29: # %cond.store15 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_14 -; RV64ZVE32F-NEXT: j .LBB91_15 -; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store19 +; RV64ZVE32F-NEXT: bnez a2, .LBB101_14 +; RV64ZVE32F-NEXT: j .LBB101_15 +; RV64ZVE32F-NEXT: .LBB101_30: # %cond.store19 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 52 -; RV64ZVE32F-NEXT: bgez a2, .LBB91_17 -; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21 +; RV64ZVE32F-NEXT: bgez a2, .LBB101_17 +; RV64ZVE32F-NEXT: .LBB101_31: # %cond.store21 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -10637,16 +12296,16 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 51 -; RV64ZVE32F-NEXT: bgez a2, .LBB91_18 -; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23 +; RV64ZVE32F-NEXT: bgez a2, .LBB101_18 +; RV64ZVE32F-NEXT: .LBB101_32: # %cond.store23 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 50 -; RV64ZVE32F-NEXT: bltz a2, .LBB91_19 -; RV64ZVE32F-NEXT: j .LBB91_20 +; RV64ZVE32F-NEXT: bltz a2, .LBB101_19 +; RV64ZVE32F-NEXT: j .LBB101_20 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m) ret void @@ -10664,37 +12323,37 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_baseidx_v32i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vslidedown.vi v10, v10, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_baseidx_v32i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v10 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64V-NEXT: vslidedown.vi v8, v8, 16 +; RV64V-NEXT: vslidedown.vi v10, v10, 16 +; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64V-NEXT: vslidedown.vi v0, v0, 2 +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v10 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 ; RV64ZVE32F-NEXT: andi a2, a1, 1 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_2 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vse8.v v8, (a2) -; RV64ZVE32F-NEXT: .LBB92_2: # %else +; RV64ZVE32F-NEXT: .LBB102_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_4 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 @@ -10703,30 +12362,30 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_4: # %else2 +; RV64ZVE32F-NEXT: .LBB102_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_49 +; RV64ZVE32F-NEXT: bnez a2, .LBB102_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_50 -; RV64ZVE32F-NEXT: .LBB92_6: # %else6 +; RV64ZVE32F-NEXT: bnez a2, .LBB102_50 +; RV64ZVE32F-NEXT: .LBB102_6: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_8 -; RV64ZVE32F-NEXT: .LBB92_7: # %cond.store7 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_8 +; RV64ZVE32F-NEXT: .LBB102_7: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_8: # %else8 +; RV64ZVE32F-NEXT: .LBB102_8: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_10 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1 @@ -10735,21 +12394,21 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) -; RV64ZVE32F-NEXT: .LBB92_10: # %else10 +; RV64ZVE32F-NEXT: .LBB102_10: # %else10 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_51 +; RV64ZVE32F-NEXT: bnez a2, .LBB102_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_52 -; RV64ZVE32F-NEXT: .LBB92_12: # %else14 +; RV64ZVE32F-NEXT: bnez a2, .LBB102_52 +; RV64ZVE32F-NEXT: .LBB102_12: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_53 -; RV64ZVE32F-NEXT: .LBB92_13: # %else16 +; RV64ZVE32F-NEXT: bnez a2, .LBB102_53 +; RV64ZVE32F-NEXT: .LBB102_13: # %else16 ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_15 -; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store17 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_15 +; RV64ZVE32F-NEXT: .LBB102_14: # %cond.store17 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -10757,22 +12416,22 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 9 ; RV64ZVE32F-NEXT: vse8.v v13, (a2) -; RV64ZVE32F-NEXT: .LBB92_15: # %else18 +; RV64ZVE32F-NEXT: .LBB102_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_17 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) -; RV64ZVE32F-NEXT: .LBB92_17: # %else20 +; RV64ZVE32F-NEXT: .LBB102_17: # %else20 ; RV64ZVE32F-NEXT: slli a2, a1, 52 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_19 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_19 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 @@ -10781,20 +12440,20 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_19: # %else22 +; RV64ZVE32F-NEXT: .LBB102_19: # %else22 ; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_21 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 12 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB92_21: # %else24 +; RV64ZVE32F-NEXT: .LBB102_21: # %else24 ; RV64ZVE32F-NEXT: slli a2, a1, 50 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_23 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_23 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 1 @@ -10803,21 +12462,21 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) -; RV64ZVE32F-NEXT: .LBB92_23: # %else26 +; RV64ZVE32F-NEXT: .LBB102_23: # %else26 ; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_54 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else28 ; RV64ZVE32F-NEXT: slli a2, a1, 48 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_55 -; RV64ZVE32F-NEXT: .LBB92_25: # %else30 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_55 +; RV64ZVE32F-NEXT: .LBB102_25: # %else30 ; RV64ZVE32F-NEXT: slli a2, a1, 47 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_56 -; RV64ZVE32F-NEXT: .LBB92_26: # %else32 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_56 +; RV64ZVE32F-NEXT: .LBB102_26: # %else32 ; RV64ZVE32F-NEXT: slli a2, a1, 46 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_28 -; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_28 +; RV64ZVE32F-NEXT: .LBB102_27: # %cond.store33 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -10826,31 +12485,31 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_28: # %else34 +; RV64ZVE32F-NEXT: .LBB102_28: # %else34 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_57 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else36 ; RV64ZVE32F-NEXT: slli a2, a1, 44 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_58 -; RV64ZVE32F-NEXT: .LBB92_30: # %else38 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_58 +; RV64ZVE32F-NEXT: .LBB102_30: # %else38 ; RV64ZVE32F-NEXT: slli a2, a1, 43 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_32 -; RV64ZVE32F-NEXT: .LBB92_31: # %cond.store39 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_32 +; RV64ZVE32F-NEXT: .LBB102_31: # %cond.store39 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_32: # %else40 +; RV64ZVE32F-NEXT: .LBB102_32: # %else40 ; RV64ZVE32F-NEXT: slli a2, a1, 42 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_34 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1 @@ -10860,21 +12519,21 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_34: # %else42 +; RV64ZVE32F-NEXT: .LBB102_34: # %else42 ; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_59 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else44 ; RV64ZVE32F-NEXT: slli a2, a1, 40 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_60 -; RV64ZVE32F-NEXT: .LBB92_36: # %else46 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_60 +; RV64ZVE32F-NEXT: .LBB102_36: # %else46 ; RV64ZVE32F-NEXT: slli a2, a1, 39 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_61 -; RV64ZVE32F-NEXT: .LBB92_37: # %else48 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_61 +; RV64ZVE32F-NEXT: .LBB102_37: # %else48 ; RV64ZVE32F-NEXT: slli a2, a1, 38 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_39 -; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_39 +; RV64ZVE32F-NEXT: .LBB102_38: # %cond.store49 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -10883,23 +12542,23 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_39: # %else50 +; RV64ZVE32F-NEXT: .LBB102_39: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 ; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_62 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else52 ; RV64ZVE32F-NEXT: slli a2, a1, 36 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_63 -; RV64ZVE32F-NEXT: .LBB92_41: # %else54 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_63 +; RV64ZVE32F-NEXT: .LBB102_41: # %else54 ; RV64ZVE32F-NEXT: slli a2, a1, 35 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_64 -; RV64ZVE32F-NEXT: .LBB92_42: # %else56 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_64 +; RV64ZVE32F-NEXT: .LBB102_42: # %else56 ; RV64ZVE32F-NEXT: slli a2, a1, 34 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_44 -; RV64ZVE32F-NEXT: .LBB92_43: # %cond.store57 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_44 +; RV64ZVE32F-NEXT: .LBB102_43: # %cond.store57 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -10908,11 +12567,11 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_44: # %else58 +; RV64ZVE32F-NEXT: .LBB102_44: # %else58 ; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_46 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -10920,10 +12579,10 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_46: # %else60 +; RV64ZVE32F-NEXT: .LBB102_46: # %else60 ; RV64ZVE32F-NEXT: lui a2, 524288 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB92_48 +; RV64ZVE32F-NEXT: beqz a1, .LBB102_48 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 @@ -10933,17 +12592,17 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v8, (a0) -; RV64ZVE32F-NEXT: .LBB92_48: # %else62 +; RV64ZVE32F-NEXT: .LBB102_48: # %else62 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB92_49: # %cond.store3 +; RV64ZVE32F-NEXT: .LBB102_49: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_6 -; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_6 +; RV64ZVE32F-NEXT: .LBB102_50: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -10952,17 +12611,17 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_7 -; RV64ZVE32F-NEXT: j .LBB92_8 -; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store11 +; RV64ZVE32F-NEXT: bnez a2, .LBB102_7 +; RV64ZVE32F-NEXT: j .LBB102_8 +; RV64ZVE32F-NEXT: .LBB102_51: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_12 -; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store13 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_12 +; RV64ZVE32F-NEXT: .LBB102_52: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -10971,25 +12630,25 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7 ; RV64ZVE32F-NEXT: vse8.v v13, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_13 -; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store15 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_13 +; RV64ZVE32F-NEXT: .LBB102_53: # %cond.store15 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8 ; RV64ZVE32F-NEXT: vse8.v v13, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 512 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_14 -; RV64ZVE32F-NEXT: j .LBB92_15 -; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store27 +; RV64ZVE32F-NEXT: bnez a2, .LBB102_14 +; RV64ZVE32F-NEXT: j .LBB102_15 +; RV64ZVE32F-NEXT: .LBB102_54: # %cond.store27 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 48 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_25 -; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store29 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_25 +; RV64ZVE32F-NEXT: .LBB102_55: # %cond.store29 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -10998,8 +12657,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 47 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_26 -; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store31 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_26 +; RV64ZVE32F-NEXT: .LBB102_56: # %cond.store31 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11007,9 +12666,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 46 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_27 -; RV64ZVE32F-NEXT: j .LBB92_28 -; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store35 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_27 +; RV64ZVE32F-NEXT: j .LBB102_28 +; RV64ZVE32F-NEXT: .LBB102_57: # %cond.store35 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma @@ -11017,8 +12676,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 44 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_30 -; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_30 +; RV64ZVE32F-NEXT: .LBB102_58: # %cond.store37 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -11028,9 +12687,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 43 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_31 -; RV64ZVE32F-NEXT: j .LBB92_32 -; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store43 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_31 +; RV64ZVE32F-NEXT: j .LBB102_32 +; RV64ZVE32F-NEXT: .LBB102_59: # %cond.store43 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma @@ -11038,8 +12697,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 40 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_36 -; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store45 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_36 +; RV64ZVE32F-NEXT: .LBB102_60: # %cond.store45 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -11049,8 +12708,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 39 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_37 -; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store47 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_37 +; RV64ZVE32F-NEXT: .LBB102_61: # %cond.store47 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11058,9 +12717,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 38 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_38 -; RV64ZVE32F-NEXT: j .LBB92_39 -; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store51 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_38 +; RV64ZVE32F-NEXT: j .LBB102_39 +; RV64ZVE32F-NEXT: .LBB102_62: # %cond.store51 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma @@ -11068,8 +12727,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 36 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_41 -; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_41 +; RV64ZVE32F-NEXT: .LBB102_63: # %cond.store53 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -11079,8 +12738,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 35 -; RV64ZVE32F-NEXT: bgez a2, .LBB92_42 -; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55 +; RV64ZVE32F-NEXT: bgez a2, .LBB102_42 +; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store55 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11088,8 +12747,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 34 -; RV64ZVE32F-NEXT: bltz a2, .LBB92_43 -; RV64ZVE32F-NEXT: j .LBB92_44 +; RV64ZVE32F-NEXT: bltz a2, .LBB102_43 +; RV64ZVE32F-NEXT: j .LBB102_44 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m) ret void @@ -11140,13 +12799,13 @@ define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) { ; RV32-NEXT: vse16.v v9, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: mscatter_shuffle_rotate: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 4 -; RV64-NEXT: vslideup.vi v9, v8, 4 -; RV64-NEXT: vse16.v v9, (a0) -; RV64-NEXT: ret +; RV64V-LABEL: mscatter_shuffle_rotate: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vslidedown.vi v9, v8, 4 +; RV64V-NEXT: vslideup.vi v9, v8, 4 +; RV64V-NEXT: vse16.v v9, (a0) +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_shuffle_rotate: ; RV64ZVE32F: # %bb.0: @@ -11178,3 +12837,11 @@ define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) { call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32V-ZVFH: {{.*}} +; RV32V-ZVFHMIN: {{.*}} +; RV32ZVE32F-ZVFH: {{.*}} +; RV32ZVE32F-ZVFHMIN: {{.*}} +; RV64: {{.*}} +; RV64V-ZVFH: {{.*}} +; RV64V-ZVFHMIN: {{.*}}