-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Lower fixed-length {insert,extract}_vector_elt on zvfhmin/zvfbfmin #114927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Lower fixed-length {insert,extract}_vector_elt on zvfhmin/zvfbfmin #114927
Conversation
…bfmin RISCVTargetLowering::lower{INSERT,EXTRACT}_VECTOR_ELT already handles f16 and bf16 scalable vectors after llvm#110221, so we can reuse it for fixed-length vectors.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesRISCVTargetLowering::lower{INSERT,EXTRACT}_VECTOR_ELT already handles f16 and bf16 scalable vectors after #110221, so we can reuse it for fixed-length vectors. Patch is 121.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114927.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d2d03d4572dac8..b48cd1f2427d06 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1333,7 +1333,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);
- setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
+ ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,
ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
VT, Custom);
@@ -1404,10 +1405,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
}
- setOperationAction({ISD::BUILD_VECTOR,
- ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
- ISD::SCALAR_TO_VECTOR},
- VT, Custom);
+ setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
+ Custom);
setOperationAction(
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index f2052ccc462793..cb830d668d2e8c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32NOM
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32M
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64NOM
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64M
+
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M
define i8 @extractelt_v16i8(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v16i8:
@@ -66,14 +69,37 @@ define i64 @extractelt_v2i64(ptr %x) nounwind {
ret i64 %b
}
-define half @extractelt_v8f16(ptr %x) nounwind {
-; CHECK-LABEL: extractelt_v8f16:
+define bfloat @extractelt_v8bf16(ptr %x) nounwind {
+; CHECK-LABEL: extractelt_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = extractelement <8 x bfloat> %a, i32 7
+ ret bfloat %b
+}
+
+define half @extractelt_v8f16(ptr %x) nounwind {
+; ZVFH-LABEL: extractelt_v8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = extractelement <8 x half> %a, i32 7
ret half %b
@@ -171,15 +197,40 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
ret i64 %b
}
-define half @extractelt_v16f16(ptr %x) nounwind {
-; CHECK-LABEL: extractelt_v16f16:
+define bfloat @extractelt_v16bf16(ptr %x) nounwind {
+; CHECK-LABEL: extractelt_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <16 x bfloat>, ptr %x
+ %b = extractelement <16 x bfloat> %a, i32 7
+ ret bfloat %b
+}
+
+define half @extractelt_v16f16(ptr %x) nounwind {
+; ZVFH-LABEL: extractelt_v16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = extractelement <16 x half> %a, i32 7
ret half %b
@@ -398,15 +449,49 @@ define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind {
ret i64 %c
}
-define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
-; CHECK-LABEL: extractelt_v8f16_idx:
+define bfloat @extractelt_v8bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; CHECK-LABEL: extractelt_v8bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfadd.vv v8, v8, v8
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v10, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vslidedown.vx v8, v10, a1
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = fadd <8 x bfloat> %a, %a
+ %c = extractelement <8 x bfloat> %b, i32 %idx
+ ret bfloat %c
+}
+
+define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; ZVFH-LABEL: extractelt_v8f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vfadd.vv v8, v8, v8
+; ZVFH-NEXT: vslidedown.vx v8, v8, a1
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v8f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vslidedown.vx v8, v10, a1
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = fadd <8 x half> %a, %a
%c = extractelement <8 x half> %b, i32 %idx
@@ -513,15 +598,49 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
ret i64 %c
}
-define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
-; CHECK-LABEL: extractelt_v16f16_idx:
+define bfloat @extractelt_v16bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; CHECK-LABEL: extractelt_v16bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfadd.vv v8, v8, v8
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
+; CHECK-NEXT: vslidedown.vx v8, v12, a1
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <16 x bfloat>, ptr %x
+ %b = fadd <16 x bfloat> %a, %a
+ %c = extractelement <16 x bfloat> %b, i32 %idx
+ ret bfloat %c
+}
+
+define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; ZVFH-LABEL: extractelt_v16f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vfadd.vv v8, v8, v8
+; ZVFH-NEXT: vslidedown.vx v8, v8, a1
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v16f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v12, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
+; ZVFHMIN-NEXT: vslidedown.vx v8, v12, a1
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = fadd <16 x half> %a, %a
%c = extractelement <16 x half> %b, i32 %idx
@@ -939,8 +1058,8 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV32NOM-LABEL: extractelt_sdiv_v4i32:
; RV32NOM: # %bb.0:
-; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
-; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
+; RV32NOM-NEXT: lui a0, %hi(.LCPI46_0)
+; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32NOM-NEXT: vle32.v v9, (a0)
; RV32NOM-NEXT: vmulh.vv v9, v8, v9
@@ -975,8 +1094,8 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
;
; RV64NOM-LABEL: extractelt_sdiv_v4i32:
; RV64NOM: # %bb.0:
-; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
-; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
+; RV64NOM-NEXT: lui a0, %hi(.LCPI46_0)
+; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64NOM-NEXT: vle32.v v9, (a0)
; RV64NOM-NEXT: vmulh.vv v9, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
index bdedc5f33c3a19..3f7cd91737f4b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
@@ -21,58 +21,18 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) {
;
; RV32-ZFBFMIN-LABEL: splat_idx_v4bf16:
; RV32-ZFBFMIN: # %bb.0:
-; RV32-ZFBFMIN-NEXT: addi sp, sp, -48
-; RV32-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
-; RV32-ZFBFMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZFBFMIN-NEXT: .cfi_offset ra, -4
-; RV32-ZFBFMIN-NEXT: csrr a1, vlenb
-; RV32-ZFBFMIN-NEXT: sub sp, sp, a1
-; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV32-ZFBFMIN-NEXT: addi a1, sp, 32
-; RV32-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-ZFBFMIN-NEXT: andi a0, a0, 3
-; RV32-ZFBFMIN-NEXT: li a1, 2
-; RV32-ZFBFMIN-NEXT: call __mulsi3
-; RV32-ZFBFMIN-NEXT: addi a1, sp, 16
-; RV32-ZFBFMIN-NEXT: add a0, a1, a0
-; RV32-ZFBFMIN-NEXT: addi a2, sp, 32
-; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1)
-; RV32-ZFBFMIN-NEXT: lh a0, 0(a0)
+; RV32-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV32-ZFBFMIN-NEXT: vmv.x.s a0, v8
; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0
-; RV32-ZFBFMIN-NEXT: csrr a0, vlenb
-; RV32-ZFBFMIN-NEXT: add sp, sp, a0
-; RV32-ZFBFMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZFBFMIN-NEXT: addi sp, sp, 48
; RV32-ZFBFMIN-NEXT: ret
;
; RV64-ZFBFMIN-LABEL: splat_idx_v4bf16:
; RV64-ZFBFMIN: # %bb.0:
-; RV64-ZFBFMIN-NEXT: addi sp, sp, -48
-; RV64-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
-; RV64-ZFBFMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZFBFMIN-NEXT: .cfi_offset ra, -8
-; RV64-ZFBFMIN-NEXT: csrr a1, vlenb
-; RV64-ZFBFMIN-NEXT: sub sp, sp, a1
-; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV64-ZFBFMIN-NEXT: addi a1, sp, 32
-; RV64-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-ZFBFMIN-NEXT: andi a0, a0, 3
-; RV64-ZFBFMIN-NEXT: li a1, 2
-; RV64-ZFBFMIN-NEXT: call __muldi3
-; RV64-ZFBFMIN-NEXT: addi a1, sp, 16
-; RV64-ZFBFMIN-NEXT: add a0, a1, a0
-; RV64-ZFBFMIN-NEXT: addi a2, sp, 32
-; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1)
-; RV64-ZFBFMIN-NEXT: lh a0, 0(a0)
+; RV64-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV64-ZFBFMIN-NEXT: vmv.x.s a0, v8
; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0
-; RV64-ZFBFMIN-NEXT: csrr a0, vlenb
-; RV64-ZFBFMIN-NEXT: add sp, sp, a0
-; RV64-ZFBFMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZFBFMIN-NEXT: addi sp, sp, 48
; RV64-ZFBFMIN-NEXT: ret
%x = extractelement <4 x bfloat> %v, i64 %idx
%ins = insertelement <4 x bfloat> poison, bfloat %x, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 924732e554f0ef..7e219836688406 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -201,58 +201,18 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
;
; RV32-ZFHMIN-LABEL: splat_idx_v4f16:
; RV32-ZFHMIN: # %bb.0:
-; RV32-ZFHMIN-NEXT: addi sp, sp, -48
-; RV32-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
-; RV32-ZFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZFHMIN-NEXT: .cfi_offset ra, -4
-; RV32-ZFHMIN-NEXT: csrr a1, vlenb
-; RV32-ZFHMIN-NEXT: sub sp, sp, a1
-; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV32-ZFHMIN-NEXT: addi a1, sp, 32
-; RV32-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-ZFHMIN-NEXT: andi a0, a0, 3
-; RV32-ZFHMIN-NEXT: li a1, 2
-; RV32-ZFHMIN-NEXT: call __mulsi3
-; RV32-ZFHMIN-NEXT: addi a1, sp, 16
-; RV32-ZFHMIN-NEXT: add a0, a1, a0
-; RV32-ZFHMIN-NEXT: addi a2, sp, 32
-; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-ZFHMIN-NEXT: vse16.v v8, (a1)
-; RV32-ZFHMIN-NEXT: lh a0, 0(a0)
+; RV32-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV32-ZFHMIN-NEXT: vmv.x.s a0, v8
; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0
-; RV32-ZFHMIN-NEXT: csrr a0, vlenb
-; RV32-ZFHMIN-NEXT: add sp, sp, a0
-; RV32-ZFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZFHMIN-NEXT: addi sp, sp, 48
; RV32-ZFHMIN-NEXT: ret
;
; RV64-ZFHMIN-LABEL: splat_idx_v4f16:
; RV64-ZFHMIN: # %bb.0:
-; RV64-ZFHMIN-NEXT: addi sp, sp, -48
-; RV64-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
-; RV64-ZFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZFHMIN-NEXT: .cfi_offset ra, -8
-; RV64-ZFHMIN-NEXT: csrr a1, vlenb
-; RV64-ZFHMIN-NEXT: sub sp, sp, a1
-; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV64-ZFHMIN-NEXT: addi a1, sp, 32
-; RV64-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-ZFHMIN-NEXT: andi a0, a0, 3
-; RV64-ZFHMIN-NEXT: li a1, 2
-; RV64-ZFHMIN-NEXT: call __muldi3
-; RV64-ZFHMIN-NEXT: addi a1, sp, 16
-; RV64-ZFHMIN-NEXT: add a0, a1, a0
-; RV64-ZFHMIN-NEXT: addi a2, sp, 32
-; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-ZFHMIN-NEXT: vse16.v v8, (a1)
-; RV64-ZFHMIN-NEXT: lh a0, 0(a0)
+; RV64-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV64-ZFHMIN-NEXT: vmv.x.s a0, v8
; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0
-; RV64-ZFHMIN-NEXT: csrr a0, vlenb
-; RV64-ZFHMIN-NEXT: add sp, sp, a0
-; RV64-ZFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZFHMIN-NEXT: addi sp, sp, 48
; RV64-ZFHMIN-NEXT: ret
%x = extractelement <4 x half> %v, i64 %idx
%ins = insertelement <4 x half> poison, half %x, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 87f9bfbd1aaff7..55249834ae72de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFHMIN,ZVFHMINRV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFHMIN,ZVFHMINRV64
define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v4i32_0:
@@ -673,3 +675,102 @@ define <8 x i64> @insertelt_c5_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range
%v = insertelement <8 x i64> %vin, i64 %a, i32 5
ret <8 x i64> %v
}
+
+define <4 x bfloat> @insertelt_v4bf16_0(<4 x bfloat> %a, bfloat %y) {
+; CHECK-LABEL: insertelt_v4bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %b = insertelement <4 x bfloat> %a, bfloat %y, i32 0
+ ret <4 x bfloat> %b
+}
+
+define <4 x bfloat> @insertelt_v4bf16_3(<4 x bfloat> %a, bfloat %y) {
+; CHECK-LABEL: insertelt_v4bf16_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: ret
+ %b = insertelement <4 x bfloat> %a, bfloat %y, i32 3
+ ret <4 x bfloat> %b
+}
+
+define <4 x bfloat> @insertelt_v4bf16_idx(<4 x bfloat> %a, bfloat %y, i32 zeroext %idx) {
+; CHECK-LABEL: insert...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
In preparation for allowing zvfhmin and zvfbfmin in isLegalElementTypeForRVV, this lowers masked gathers and scatters. We need to mark f16 and bf16 as legal in isLegalMaskedGatherScatter otherwise ScalarizeMaskedMemIntrin will just scalarize them, but we can move this back into isLegalElementTypeForRVV afterwards. The scalarized codegen required llvm#114938, llvm#114927 and llvm#114915 to not crash.
…4945) In preparation for allowing zvfhmin and zvfbfmin in isLegalElementTypeForRVV, this lowers fixed-length masked gathers and scatters We need to mark f16 and bf16 as legal in isLegalMaskedGatherScatter otherwise ScalarizeMaskedMemIntrin will just scalarize them, but we can move this back into isLegalElementTypeForRVV afterwards. The scalarized codegen required #114938, #114927 and #114915 to not crash.
…bfmin (llvm#114927) RISCVTargetLowering::lower{INSERT,EXTRACT}_VECTOR_ELT already handles f16 and bf16 scalable vectors after llvm#110221, so we can reuse it for fixed-length vectors.
RISCVTargetLowering::lower{INSERT,EXTRACT}_VECTOR_ELT already handles f16 and bf16 scalable vectors after #110221, so we can reuse it for fixed-length vectors.