Skip to content

Commit aea6b25

Browse files
authored
[RISCV] Lower fixed-length {insert,extract}_vector_elt on zvfhmin/zvfbfmin (#114927)
RISCVTargetLowering::lower{INSERT,EXTRACT}_VECTOR_ELT already handles f16 and bf16 scalable vectors after #110221, so we can reuse it for fixed-length vectors.
1 parent a8f8089 commit aea6b25

6 files changed

+1758
-971
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,7 +1333,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13331333
// expansion to a build_vector of 0s.
13341334
setOperationAction(ISD::UNDEF, VT, Custom);
13351335

1336-
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1336+
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
1337+
ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
13371338
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,
13381339
ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
13391340
VT, Custom);
@@ -1404,10 +1405,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14041405
continue;
14051406
}
14061407

1407-
setOperationAction({ISD::BUILD_VECTOR,
1408-
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
1409-
ISD::SCALAR_TO_VECTOR},
1410-
VT, Custom);
1408+
setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
1409+
Custom);
14111410

14121411
setOperationAction(
14131412
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll

Lines changed: 143 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
3-
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
4-
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
5-
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
2+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32NOM
3+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32M
4+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64NOM
5+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64M
6+
7+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M
8+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M
69

710
define i8 @extractelt_v16i8(ptr %x) nounwind {
811
; CHECK-LABEL: extractelt_v16i8:
@@ -66,14 +69,37 @@ define i64 @extractelt_v2i64(ptr %x) nounwind {
6669
ret i64 %b
6770
}
6871

69-
define half @extractelt_v8f16(ptr %x) nounwind {
70-
; CHECK-LABEL: extractelt_v8f16:
72+
define bfloat @extractelt_v8bf16(ptr %x) nounwind {
73+
; CHECK-LABEL: extractelt_v8bf16:
7174
; CHECK: # %bb.0:
7275
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7376
; CHECK-NEXT: vle16.v v8, (a0)
7477
; CHECK-NEXT: vslidedown.vi v8, v8, 7
75-
; CHECK-NEXT: vfmv.f.s fa0, v8
78+
; CHECK-NEXT: vmv.x.s a0, v8
79+
; CHECK-NEXT: fmv.h.x fa0, a0
7680
; CHECK-NEXT: ret
81+
%a = load <8 x bfloat>, ptr %x
82+
%b = extractelement <8 x bfloat> %a, i32 7
83+
ret bfloat %b
84+
}
85+
86+
define half @extractelt_v8f16(ptr %x) nounwind {
87+
; ZVFH-LABEL: extractelt_v8f16:
88+
; ZVFH: # %bb.0:
89+
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
90+
; ZVFH-NEXT: vle16.v v8, (a0)
91+
; ZVFH-NEXT: vslidedown.vi v8, v8, 7
92+
; ZVFH-NEXT: vfmv.f.s fa0, v8
93+
; ZVFH-NEXT: ret
94+
;
95+
; ZVFHMIN-LABEL: extractelt_v8f16:
96+
; ZVFHMIN: # %bb.0:
97+
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
98+
; ZVFHMIN-NEXT: vle16.v v8, (a0)
99+
; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
100+
; ZVFHMIN-NEXT: vmv.x.s a0, v8
101+
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
102+
; ZVFHMIN-NEXT: ret
77103
%a = load <8 x half>, ptr %x
78104
%b = extractelement <8 x half> %a, i32 7
79105
ret half %b
@@ -171,15 +197,40 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
171197
ret i64 %b
172198
}
173199

174-
define half @extractelt_v16f16(ptr %x) nounwind {
175-
; CHECK-LABEL: extractelt_v16f16:
200+
define bfloat @extractelt_v16bf16(ptr %x) nounwind {
201+
; CHECK-LABEL: extractelt_v16bf16:
176202
; CHECK: # %bb.0:
177203
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
178204
; CHECK-NEXT: vle16.v v8, (a0)
179205
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
180206
; CHECK-NEXT: vslidedown.vi v8, v8, 7
181-
; CHECK-NEXT: vfmv.f.s fa0, v8
207+
; CHECK-NEXT: vmv.x.s a0, v8
208+
; CHECK-NEXT: fmv.h.x fa0, a0
182209
; CHECK-NEXT: ret
210+
%a = load <16 x bfloat>, ptr %x
211+
%b = extractelement <16 x bfloat> %a, i32 7
212+
ret bfloat %b
213+
}
214+
215+
define half @extractelt_v16f16(ptr %x) nounwind {
216+
; ZVFH-LABEL: extractelt_v16f16:
217+
; ZVFH: # %bb.0:
218+
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
219+
; ZVFH-NEXT: vle16.v v8, (a0)
220+
; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
221+
; ZVFH-NEXT: vslidedown.vi v8, v8, 7
222+
; ZVFH-NEXT: vfmv.f.s fa0, v8
223+
; ZVFH-NEXT: ret
224+
;
225+
; ZVFHMIN-LABEL: extractelt_v16f16:
226+
; ZVFHMIN: # %bb.0:
227+
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
228+
; ZVFHMIN-NEXT: vle16.v v8, (a0)
229+
; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
230+
; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
231+
; ZVFHMIN-NEXT: vmv.x.s a0, v8
232+
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
233+
; ZVFHMIN-NEXT: ret
183234
%a = load <16 x half>, ptr %x
184235
%b = extractelement <16 x half> %a, i32 7
185236
ret half %b
@@ -398,15 +449,49 @@ define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind {
398449
ret i64 %c
399450
}
400451

401-
define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
402-
; CHECK-LABEL: extractelt_v8f16_idx:
452+
define bfloat @extractelt_v8bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
453+
; CHECK-LABEL: extractelt_v8bf16_idx:
403454
; CHECK: # %bb.0:
404455
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
405456
; CHECK-NEXT: vle16.v v8, (a0)
406-
; CHECK-NEXT: vfadd.vv v8, v8, v8
407-
; CHECK-NEXT: vslidedown.vx v8, v8, a1
408-
; CHECK-NEXT: vfmv.f.s fa0, v8
457+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
458+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
459+
; CHECK-NEXT: vfadd.vv v8, v10, v10
460+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
461+
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
462+
; CHECK-NEXT: vslidedown.vx v8, v10, a1
463+
; CHECK-NEXT: vmv.x.s a0, v8
464+
; CHECK-NEXT: fmv.h.x fa0, a0
409465
; CHECK-NEXT: ret
466+
%a = load <8 x bfloat>, ptr %x
467+
%b = fadd <8 x bfloat> %a, %a
468+
%c = extractelement <8 x bfloat> %b, i32 %idx
469+
ret bfloat %c
470+
}
471+
472+
define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
473+
; ZVFH-LABEL: extractelt_v8f16_idx:
474+
; ZVFH: # %bb.0:
475+
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
476+
; ZVFH-NEXT: vle16.v v8, (a0)
477+
; ZVFH-NEXT: vfadd.vv v8, v8, v8
478+
; ZVFH-NEXT: vslidedown.vx v8, v8, a1
479+
; ZVFH-NEXT: vfmv.f.s fa0, v8
480+
; ZVFH-NEXT: ret
481+
;
482+
; ZVFHMIN-LABEL: extractelt_v8f16_idx:
483+
; ZVFHMIN: # %bb.0:
484+
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
485+
; ZVFHMIN-NEXT: vle16.v v8, (a0)
486+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
487+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
488+
; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
489+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
490+
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
491+
; ZVFHMIN-NEXT: vslidedown.vx v8, v10, a1
492+
; ZVFHMIN-NEXT: vmv.x.s a0, v8
493+
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
494+
; ZVFHMIN-NEXT: ret
410495
%a = load <8 x half>, ptr %x
411496
%b = fadd <8 x half> %a, %a
412497
%c = extractelement <8 x half> %b, i32 %idx
@@ -513,15 +598,49 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
513598
ret i64 %c
514599
}
515600

516-
define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
517-
; CHECK-LABEL: extractelt_v16f16_idx:
601+
define bfloat @extractelt_v16bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
602+
; CHECK-LABEL: extractelt_v16bf16_idx:
518603
; CHECK: # %bb.0:
519604
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
520605
; CHECK-NEXT: vle16.v v8, (a0)
521-
; CHECK-NEXT: vfadd.vv v8, v8, v8
522-
; CHECK-NEXT: vslidedown.vx v8, v8, a1
523-
; CHECK-NEXT: vfmv.f.s fa0, v8
606+
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
607+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
608+
; CHECK-NEXT: vfadd.vv v8, v12, v12
609+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
610+
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
611+
; CHECK-NEXT: vslidedown.vx v8, v12, a1
612+
; CHECK-NEXT: vmv.x.s a0, v8
613+
; CHECK-NEXT: fmv.h.x fa0, a0
524614
; CHECK-NEXT: ret
615+
%a = load <16 x bfloat>, ptr %x
616+
%b = fadd <16 x bfloat> %a, %a
617+
%c = extractelement <16 x bfloat> %b, i32 %idx
618+
ret bfloat %c
619+
}
620+
621+
define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
622+
; ZVFH-LABEL: extractelt_v16f16_idx:
623+
; ZVFH: # %bb.0:
624+
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
625+
; ZVFH-NEXT: vle16.v v8, (a0)
626+
; ZVFH-NEXT: vfadd.vv v8, v8, v8
627+
; ZVFH-NEXT: vslidedown.vx v8, v8, a1
628+
; ZVFH-NEXT: vfmv.f.s fa0, v8
629+
; ZVFH-NEXT: ret
630+
;
631+
; ZVFHMIN-LABEL: extractelt_v16f16_idx:
632+
; ZVFHMIN: # %bb.0:
633+
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
634+
; ZVFHMIN-NEXT: vle16.v v8, (a0)
635+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
636+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
637+
; ZVFHMIN-NEXT: vfadd.vv v8, v12, v12
638+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
639+
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
640+
; ZVFHMIN-NEXT: vslidedown.vx v8, v12, a1
641+
; ZVFHMIN-NEXT: vmv.x.s a0, v8
642+
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
643+
; ZVFHMIN-NEXT: ret
525644
%a = load <16 x half>, ptr %x
526645
%b = fadd <16 x half> %a, %a
527646
%c = extractelement <16 x half> %b, i32 %idx
@@ -939,8 +1058,8 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
9391058
define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
9401059
; RV32NOM-LABEL: extractelt_sdiv_v4i32:
9411060
; RV32NOM: # %bb.0:
942-
; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
943-
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
1061+
; RV32NOM-NEXT: lui a0, %hi(.LCPI46_0)
1062+
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
9441063
; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
9451064
; RV32NOM-NEXT: vle32.v v9, (a0)
9461065
; RV32NOM-NEXT: vmulh.vv v9, v8, v9
@@ -975,8 +1094,8 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
9751094
;
9761095
; RV64NOM-LABEL: extractelt_sdiv_v4i32:
9771096
; RV64NOM: # %bb.0:
978-
; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
979-
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
1097+
; RV64NOM-NEXT: lui a0, %hi(.LCPI46_0)
1098+
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
9801099
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
9811100
; RV64NOM-NEXT: vle32.v v9, (a0)
9821101
; RV64NOM-NEXT: vmulh.vv v9, v8, v9

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -21,58 +21,18 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) {
2121
;
2222
; RV32-ZFBFMIN-LABEL: splat_idx_v4bf16:
2323
; RV32-ZFBFMIN: # %bb.0:
24-
; RV32-ZFBFMIN-NEXT: addi sp, sp, -48
25-
; RV32-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
26-
; RV32-ZFBFMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
27-
; RV32-ZFBFMIN-NEXT: .cfi_offset ra, -4
28-
; RV32-ZFBFMIN-NEXT: csrr a1, vlenb
29-
; RV32-ZFBFMIN-NEXT: sub sp, sp, a1
30-
; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
31-
; RV32-ZFBFMIN-NEXT: addi a1, sp, 32
32-
; RV32-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
33-
; RV32-ZFBFMIN-NEXT: andi a0, a0, 3
34-
; RV32-ZFBFMIN-NEXT: li a1, 2
35-
; RV32-ZFBFMIN-NEXT: call __mulsi3
36-
; RV32-ZFBFMIN-NEXT: addi a1, sp, 16
37-
; RV32-ZFBFMIN-NEXT: add a0, a1, a0
38-
; RV32-ZFBFMIN-NEXT: addi a2, sp, 32
39-
; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
4024
; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
41-
; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1)
42-
; RV32-ZFBFMIN-NEXT: lh a0, 0(a0)
25+
; RV32-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
26+
; RV32-ZFBFMIN-NEXT: vmv.x.s a0, v8
4327
; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0
44-
; RV32-ZFBFMIN-NEXT: csrr a0, vlenb
45-
; RV32-ZFBFMIN-NEXT: add sp, sp, a0
46-
; RV32-ZFBFMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
47-
; RV32-ZFBFMIN-NEXT: addi sp, sp, 48
4828
; RV32-ZFBFMIN-NEXT: ret
4929
;
5030
; RV64-ZFBFMIN-LABEL: splat_idx_v4bf16:
5131
; RV64-ZFBFMIN: # %bb.0:
52-
; RV64-ZFBFMIN-NEXT: addi sp, sp, -48
53-
; RV64-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
54-
; RV64-ZFBFMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
55-
; RV64-ZFBFMIN-NEXT: .cfi_offset ra, -8
56-
; RV64-ZFBFMIN-NEXT: csrr a1, vlenb
57-
; RV64-ZFBFMIN-NEXT: sub sp, sp, a1
58-
; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
59-
; RV64-ZFBFMIN-NEXT: addi a1, sp, 32
60-
; RV64-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
61-
; RV64-ZFBFMIN-NEXT: andi a0, a0, 3
62-
; RV64-ZFBFMIN-NEXT: li a1, 2
63-
; RV64-ZFBFMIN-NEXT: call __muldi3
64-
; RV64-ZFBFMIN-NEXT: addi a1, sp, 16
65-
; RV64-ZFBFMIN-NEXT: add a0, a1, a0
66-
; RV64-ZFBFMIN-NEXT: addi a2, sp, 32
67-
; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
6832
; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
69-
; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1)
70-
; RV64-ZFBFMIN-NEXT: lh a0, 0(a0)
33+
; RV64-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
34+
; RV64-ZFBFMIN-NEXT: vmv.x.s a0, v8
7135
; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0
72-
; RV64-ZFBFMIN-NEXT: csrr a0, vlenb
73-
; RV64-ZFBFMIN-NEXT: add sp, sp, a0
74-
; RV64-ZFBFMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
75-
; RV64-ZFBFMIN-NEXT: addi sp, sp, 48
7636
; RV64-ZFBFMIN-NEXT: ret
7737
%x = extractelement <4 x bfloat> %v, i64 %idx
7838
%ins = insertelement <4 x bfloat> poison, bfloat %x, i32 0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -201,58 +201,18 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
201201
;
202202
; RV32-ZFHMIN-LABEL: splat_idx_v4f16:
203203
; RV32-ZFHMIN: # %bb.0:
204-
; RV32-ZFHMIN-NEXT: addi sp, sp, -48
205-
; RV32-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
206-
; RV32-ZFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
207-
; RV32-ZFHMIN-NEXT: .cfi_offset ra, -4
208-
; RV32-ZFHMIN-NEXT: csrr a1, vlenb
209-
; RV32-ZFHMIN-NEXT: sub sp, sp, a1
210-
; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
211-
; RV32-ZFHMIN-NEXT: addi a1, sp, 32
212-
; RV32-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
213-
; RV32-ZFHMIN-NEXT: andi a0, a0, 3
214-
; RV32-ZFHMIN-NEXT: li a1, 2
215-
; RV32-ZFHMIN-NEXT: call __mulsi3
216-
; RV32-ZFHMIN-NEXT: addi a1, sp, 16
217-
; RV32-ZFHMIN-NEXT: add a0, a1, a0
218-
; RV32-ZFHMIN-NEXT: addi a2, sp, 32
219-
; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
220204
; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
221-
; RV32-ZFHMIN-NEXT: vse16.v v8, (a1)
222-
; RV32-ZFHMIN-NEXT: lh a0, 0(a0)
205+
; RV32-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
206+
; RV32-ZFHMIN-NEXT: vmv.x.s a0, v8
223207
; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0
224-
; RV32-ZFHMIN-NEXT: csrr a0, vlenb
225-
; RV32-ZFHMIN-NEXT: add sp, sp, a0
226-
; RV32-ZFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
227-
; RV32-ZFHMIN-NEXT: addi sp, sp, 48
228208
; RV32-ZFHMIN-NEXT: ret
229209
;
230210
; RV64-ZFHMIN-LABEL: splat_idx_v4f16:
231211
; RV64-ZFHMIN: # %bb.0:
232-
; RV64-ZFHMIN-NEXT: addi sp, sp, -48
233-
; RV64-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
234-
; RV64-ZFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
235-
; RV64-ZFHMIN-NEXT: .cfi_offset ra, -8
236-
; RV64-ZFHMIN-NEXT: csrr a1, vlenb
237-
; RV64-ZFHMIN-NEXT: sub sp, sp, a1
238-
; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
239-
; RV64-ZFHMIN-NEXT: addi a1, sp, 32
240-
; RV64-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
241-
; RV64-ZFHMIN-NEXT: andi a0, a0, 3
242-
; RV64-ZFHMIN-NEXT: li a1, 2
243-
; RV64-ZFHMIN-NEXT: call __muldi3
244-
; RV64-ZFHMIN-NEXT: addi a1, sp, 16
245-
; RV64-ZFHMIN-NEXT: add a0, a1, a0
246-
; RV64-ZFHMIN-NEXT: addi a2, sp, 32
247-
; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
248212
; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
249-
; RV64-ZFHMIN-NEXT: vse16.v v8, (a1)
250-
; RV64-ZFHMIN-NEXT: lh a0, 0(a0)
213+
; RV64-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
214+
; RV64-ZFHMIN-NEXT: vmv.x.s a0, v8
251215
; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0
252-
; RV64-ZFHMIN-NEXT: csrr a0, vlenb
253-
; RV64-ZFHMIN-NEXT: add sp, sp, a0
254-
; RV64-ZFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
255-
; RV64-ZFHMIN-NEXT: addi sp, sp, 48
256216
; RV64-ZFHMIN-NEXT: ret
257217
%x = extractelement <4 x half> %v, i64 %idx
258218
%ins = insertelement <4 x half> poison, half %x, i32 0

0 commit comments

Comments
 (0)