Skip to content

Commit 22f9874

Browse files
authored
[llvm][RISCV] Support RISCV vector tuple CodeGen and Calling Convention (#97995)
This patch handles target lowering and calling convention. For target lowering, the vector tuple type represented as multiple scalable vectors is now changed to a single `MVT`, each `MVT` has a corresponding register class. The load/store of vector tuples are handled as the same way but need another vector insert/extract instructions to get sub-register group. Inline assembly constraint for vector tuple type can directly be modeled as "vr" which is identical to normal vector registers. For calling convention, it no longer needs an alternative algorithm to handle register allocation, this makes the code easier to maintain and read. Stacked on #97994
1 parent db67a66 commit 22f9874

36 files changed

+101700
-107242
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 129 additions & 146 deletions
Large diffs are not rendered by default.

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,11 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
153153
SmallVectorImpl<SDValue> &Operands,
154154
bool IsLoad = false, MVT *IndexVT = nullptr);
155155

156-
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided);
157-
void selectVLSEGFF(SDNode *Node, bool IsMasked);
158-
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
159-
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
160-
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
156+
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided);
157+
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked);
158+
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered);
159+
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided);
160+
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered);
161161

162162
void selectVSETVLI(SDNode *Node);
163163

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 427 additions & 41 deletions
Large diffs are not rendered by default.

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,10 @@ enum NodeType : unsigned {
441441
SF_VC_V_VVW_SE,
442442
SF_VC_V_FVW_SE,
443443

444+
// RISC-V vector tuple type version of INSERT_SUBVECTOR/EXTRACT_SUBVECTOR.
445+
TUPLE_INSERT,
446+
TUPLE_EXTRACT,
447+
444448
// FP to 32 bit int conversions for RV64. These are used to keep track of the
445449
// result being sign extended to 64 bit. These saturate out of range inputs.
446450
STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,

llvm/lib/Target/RISCV/RISCVRegisterInfo.td

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -616,15 +616,40 @@ def GPRPair : RISCVRegisterClass<[XLenPairFVT], 64, (add
616616
// The register class is added for inline assembly for vector mask types.
617617
def VM : VReg<VMaskVTs, (add VR), 1>;
618618

619+
defvar VTupM1N2VTs = [riscv_nxv8i8x2, riscv_nxv4i8x2, riscv_nxv2i8x2, riscv_nxv1i8x2];
620+
defvar VTupM1N3VTs = [riscv_nxv8i8x3, riscv_nxv4i8x3, riscv_nxv2i8x3, riscv_nxv1i8x3];
621+
defvar VTupM1N4VTs = [riscv_nxv8i8x4, riscv_nxv4i8x4, riscv_nxv2i8x4, riscv_nxv1i8x4];
622+
defvar VTupM1N5VTs = [riscv_nxv8i8x5, riscv_nxv4i8x5, riscv_nxv2i8x5, riscv_nxv1i8x5];
623+
defvar VTupM1N6VTs = [riscv_nxv8i8x6, riscv_nxv4i8x6, riscv_nxv2i8x6, riscv_nxv1i8x6];
624+
defvar VTupM1N7VTs = [riscv_nxv8i8x7, riscv_nxv4i8x7, riscv_nxv2i8x7, riscv_nxv1i8x7];
625+
defvar VTupM1N8VTs = [riscv_nxv8i8x8, riscv_nxv4i8x8, riscv_nxv2i8x8, riscv_nxv1i8x8];
626+
defvar VTupM2N2VTs = [riscv_nxv16i8x2];
627+
defvar VTupM2N3VTs = [riscv_nxv16i8x3];
628+
defvar VTupM2N4VTs = [riscv_nxv16i8x4];
629+
defvar VTupM4N2VTs = [riscv_nxv32i8x2];
630+
class VTupRegList<int LMUL, int NF> {
631+
list<ValueType> L = !cond(!and(!eq(LMUL, 1), !eq(NF, 2)): VTupM1N2VTs,
632+
!and(!eq(LMUL, 1), !eq(NF, 3)): VTupM1N3VTs,
633+
!and(!eq(LMUL, 1), !eq(NF, 4)): VTupM1N4VTs,
634+
!and(!eq(LMUL, 1), !eq(NF, 5)): VTupM1N5VTs,
635+
!and(!eq(LMUL, 1), !eq(NF, 6)): VTupM1N6VTs,
636+
!and(!eq(LMUL, 1), !eq(NF, 7)): VTupM1N7VTs,
637+
!and(!eq(LMUL, 1), !eq(NF, 8)): VTupM1N8VTs,
638+
!and(!eq(LMUL, 2), !eq(NF, 2)): VTupM2N2VTs,
639+
!and(!eq(LMUL, 2), !eq(NF, 3)): VTupM2N3VTs,
640+
!and(!eq(LMUL, 2), !eq(NF, 4)): VTupM2N4VTs,
641+
!and(!eq(LMUL, 4), !eq(NF, 2)): VTupM4N2VTs);
642+
}
643+
619644
foreach m = LMULList in {
620645
foreach nf = NFList<m>.L in {
621646
let NF = nf in {
622647
def "VRN" # nf # "M" # m # "NoV0"
623-
: VReg<[untyped],
648+
: VReg<VTupRegList<m, nf>.L,
624649
(add !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0")),
625650
m>;
626651
def "VRN" # nf # "M" # m
627-
: VReg<[untyped],
652+
: VReg<VTupRegList<m, nf>.L,
628653
(add !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0"),
629654
!cast<RegisterTuples>("VN" # nf # "M" # m # "V0")),
630655
m>;

llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,32 +55,33 @@ define void @_Z3foov() {
5555
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
5656
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
5757
; CHECK-NEXT: vle16.v v8, (a0)
58-
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
59-
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
60-
; CHECK-NEXT: addi a1, sp, 16
61-
; CHECK-NEXT: csrr a2, vlenb
62-
; CHECK-NEXT: slli a2, a2, 1
63-
; CHECK-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload
64-
; CHECK-NEXT: add a1, a1, a2
65-
; CHECK-NEXT: vl2r.v v12, (a1) # Unknown-size Folded Reload
66-
; CHECK-NEXT: add a1, a1, a2
67-
; CHECK-NEXT: vl2r.v v14, (a1) # Unknown-size Folded Reload
68-
; CHECK-NEXT: add a1, a1, a2
69-
; CHECK-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload
70-
; CHECK-NEXT: vle16.v v16, (a0)
7158
; CHECK-NEXT: lui a0, 1048572
7259
; CHECK-NEXT: addi a0, a0, 928
7360
; CHECK-NEXT: vmsbc.vx v0, v8, a0
61+
; CHECK-NEXT: addi a0, sp, 16
62+
; CHECK-NEXT: csrr a1, vlenb
63+
; CHECK-NEXT: slli a1, a1, 1
64+
; CHECK-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
65+
; CHECK-NEXT: add a0, a0, a1
66+
; CHECK-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
67+
; CHECK-NEXT: add a0, a0, a1
68+
; CHECK-NEXT: vl2r.v v12, (a0) # Unknown-size Folded Reload
69+
; CHECK-NEXT: add a0, a0, a1
70+
; CHECK-NEXT: vl2r.v v14, (a0) # Unknown-size Folded Reload
7471
; CHECK-NEXT: csrr a0, vlenb
7572
; CHECK-NEXT: slli a0, a0, 3
7673
; CHECK-NEXT: add a0, sp, a0
7774
; CHECK-NEXT: addi a0, a0, 16
78-
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
75+
; CHECK-NEXT: vl1r.v v14, (a0) # Unknown-size Folded Reload
7976
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
80-
; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
77+
; CHECK-NEXT: vsext.vf2 v8, v14, v0.t
78+
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
79+
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
80+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
81+
; CHECK-NEXT: vle16.v v14, (a0)
8182
; CHECK-NEXT: lui a0, %hi(var_47)
8283
; CHECK-NEXT: addi a0, a0, %lo(var_47)
83-
; CHECK-NEXT: vsseg4e16.v v10, (a0)
84+
; CHECK-NEXT: vsseg4e16.v v8, (a0)
8485
; CHECK-NEXT: csrr a0, vlenb
8586
; CHECK-NEXT: li a1, 10
8687
; CHECK-NEXT: mul a0, a0, a1
@@ -100,7 +101,11 @@ entry:
100101
%8 = tail call <vscale x 8 x i1> @llvm.riscv.vmsbc.nxv8i16.i16.i64(<vscale x 8 x i16> %6, i16 -15456, i64 2)
101102
%9 = tail call i64 @llvm.riscv.vsetvli.i64(i64 2, i64 1, i64 1)
102103
%10 = tail call <vscale x 8 x i16> @llvm.riscv.vsext.mask.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %8, i64 2, i64 0)
103-
tail call void @llvm.riscv.vsseg4.nxv8i16.i64(<vscale x 8 x i16> %10, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, <vscale x 8 x i16> %4, ptr nonnull @var_47, i64 2)
104+
%v_0 = call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_4t.nxv8i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) poison, <vscale x 8 x i16> %10, i32 0)
105+
%v_1 = call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_4t.nxv8i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %v_0, <vscale x 8 x i16> %2, i32 1)
106+
%v_2 = call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_4t.nxv8i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %v_1, <vscale x 8 x i16> %3, i32 2)
107+
%v_3 = call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_4t.nxv8i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %v_2, <vscale x 8 x i16> %4, i32 3)
108+
tail call void @llvm.riscv.vsseg4.nxv8i16.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %v_3, ptr nonnull @var_47, i64 2, i64 4)
104109
ret void
105110
}
106111

@@ -114,4 +119,6 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsbc.nxv8i16.i16.i64(<vscale x 8 x i16>,
114119

115120
declare <vscale x 8 x i16> @llvm.riscv.vsext.mask.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>, <vscale x 8 x i8>, <vscale x 8 x i1>, i64, i64 immarg)
116121

117-
declare void @llvm.riscv.vsseg4.nxv8i16.i64(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, ptr nocapture, i64)
122+
declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_4t.nxv8i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), <vscale x 8 x i16>, i32)
123+
124+
declare void @llvm.riscv.vsseg4.nxv8i16.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr nocapture, i64, i64)

llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ define void @last_chance_recoloring_failure() {
123123
; SUBREGLIVENESS-NEXT: addi sp, sp, 32
124124
; SUBREGLIVENESS-NEXT: ret
125125
entry:
126-
%i = call { <vscale x 16 x half>, <vscale x 16 x half>} @llvm.riscv.vloxseg2.nxv16f16.nxv16i32.i64( <vscale x 16 x half> undef, <vscale x 16 x half> undef, ptr nonnull poison, <vscale x 16 x i32> poison, i64 55)
127-
%i1 = extractvalue { <vscale x 16 x half>, <vscale x 16 x half> } %i, 0
126+
%i = call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.nxv16f16.nxv16i32.i64(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr nonnull poison, <vscale x 16 x i32> poison, i64 55, i64 4)
127+
%i1 = tail call <vscale x 16 x half> @llvm.riscv.tuple.extract.v16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %i, i32 0)
128128
%i2 = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> zeroinitializer, i64 7, i64 36, i64 0)
129129
call void @func()
130130
%i3 = call <vscale x 16 x i16> @llvm.riscv.vrgather.vv.mask.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> poison, <vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i64 32, i64 0)
@@ -136,7 +136,8 @@ entry:
136136
}
137137

138138
declare void @func()
139-
declare { <vscale x 16 x half>, <vscale x 16 x half>} @llvm.riscv.vloxseg2.nxv16f16.nxv16i32.i64( <vscale x 16 x half>, <vscale x 16 x half>, ptr nocapture, <vscale x 16 x i32>, i64)
139+
declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.nxv16f16.nxv16i32.i64(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr nocapture, <vscale x 16 x i32>, i64, i64)
140+
declare <vscale x 16 x half> @llvm.riscv.tuple.extract.v16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), i32)
140141
declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float>, <vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x i1>, i64, i64, i64 immarg)
141142
declare <vscale x 16 x i16> @llvm.riscv.vrgather.vv.mask.nxv16i16.i64(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i64, i64 immarg)
142143
declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16.i64(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x half>, i64, i64)
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v -verify-machineinstrs \
3+
; RUN: --riscv-no-aliases < %s | FileCheck %s
4+
5+
target triple = "riscv64-unknown-unknown-elf"
6+
7+
define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @load_store_m1x5(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %tuple) {
8+
; CHECK-LABEL: load_store_m1x5:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: addi sp, sp, -16
11+
; CHECK-NEXT: .cfi_def_cfa_offset 16
12+
; CHECK-NEXT: csrrs a0, vlenb, zero
13+
; CHECK-NEXT: slli a0, a0, 3
14+
; CHECK-NEXT: sub sp, sp, a0
15+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
16+
; CHECK-NEXT: addi a0, sp, 16
17+
; CHECK-NEXT: vs1r.v v8, (a0)
18+
; CHECK-NEXT: csrrs a1, vlenb, zero
19+
; CHECK-NEXT: add a2, a0, a1
20+
; CHECK-NEXT: vs1r.v v9, (a2)
21+
; CHECK-NEXT: add a3, a2, a1
22+
; CHECK-NEXT: vs1r.v v10, (a3)
23+
; CHECK-NEXT: add a4, a3, a1
24+
; CHECK-NEXT: vs1r.v v11, (a4)
25+
; CHECK-NEXT: add a1, a4, a1
26+
; CHECK-NEXT: vs1r.v v12, (a1)
27+
; CHECK-NEXT: #APP
28+
; CHECK-NEXT: #NO_APP
29+
; CHECK-NEXT: vl1re8.v v8, (a0)
30+
; CHECK-NEXT: vl1re8.v v9, (a2)
31+
; CHECK-NEXT: vl1re8.v v10, (a3)
32+
; CHECK-NEXT: vl1re8.v v11, (a4)
33+
; CHECK-NEXT: vl1re8.v v12, (a1)
34+
; CHECK-NEXT: csrrs a0, vlenb, zero
35+
; CHECK-NEXT: slli a0, a0, 3
36+
; CHECK-NEXT: add sp, sp, a0
37+
; CHECK-NEXT: addi sp, sp, 16
38+
; CHECK-NEXT: jalr zero, 0(ra)
39+
entry:
40+
%tuple.addr = alloca target("riscv.vector.tuple", <vscale x 8 x i8>, 5), align 1
41+
store target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %tuple, ptr %tuple.addr, align 1
42+
call void asm sideeffect "",
43+
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
44+
%0 = load target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr %tuple.addr, align 1
45+
ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
46+
}
47+
48+
define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @load_store_m2x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %tuple) {
49+
; CHECK-LABEL: load_store_m2x2:
50+
; CHECK: # %bb.0: # %entry
51+
; CHECK-NEXT: addi sp, sp, -16
52+
; CHECK-NEXT: .cfi_def_cfa_offset 16
53+
; CHECK-NEXT: csrrs a0, vlenb, zero
54+
; CHECK-NEXT: slli a0, a0, 2
55+
; CHECK-NEXT: sub sp, sp, a0
56+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
57+
; CHECK-NEXT: addi a0, sp, 16
58+
; CHECK-NEXT: vs2r.v v8, (a0)
59+
; CHECK-NEXT: csrrs a1, vlenb, zero
60+
; CHECK-NEXT: slli a1, a1, 1
61+
; CHECK-NEXT: add a1, a0, a1
62+
; CHECK-NEXT: vs2r.v v10, (a1)
63+
; CHECK-NEXT: #APP
64+
; CHECK-NEXT: #NO_APP
65+
; CHECK-NEXT: vl2re8.v v8, (a0)
66+
; CHECK-NEXT: vl2re8.v v10, (a1)
67+
; CHECK-NEXT: csrrs a0, vlenb, zero
68+
; CHECK-NEXT: slli a0, a0, 2
69+
; CHECK-NEXT: add sp, sp, a0
70+
; CHECK-NEXT: addi sp, sp, 16
71+
; CHECK-NEXT: jalr zero, 0(ra)
72+
entry:
73+
%tuple.addr = alloca target("riscv.vector.tuple", <vscale x 16 x i8>, 2), align 1
74+
store target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %tuple, ptr %tuple.addr, align 1
75+
call void asm sideeffect "",
76+
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
77+
%0 = load target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr %tuple.addr, align 1
78+
ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
79+
}
80+
81+
define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @load_store_m4x2(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %tuple) {
82+
; CHECK-LABEL: load_store_m4x2:
83+
; CHECK: # %bb.0: # %entry
84+
; CHECK-NEXT: addi sp, sp, -16
85+
; CHECK-NEXT: .cfi_def_cfa_offset 16
86+
; CHECK-NEXT: csrrs a0, vlenb, zero
87+
; CHECK-NEXT: slli a0, a0, 3
88+
; CHECK-NEXT: sub sp, sp, a0
89+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
90+
; CHECK-NEXT: addi a0, sp, 16
91+
; CHECK-NEXT: vs4r.v v8, (a0)
92+
; CHECK-NEXT: csrrs a1, vlenb, zero
93+
; CHECK-NEXT: slli a1, a1, 2
94+
; CHECK-NEXT: add a1, a0, a1
95+
; CHECK-NEXT: vs4r.v v12, (a1)
96+
; CHECK-NEXT: #APP
97+
; CHECK-NEXT: #NO_APP
98+
; CHECK-NEXT: vl4re8.v v8, (a0)
99+
; CHECK-NEXT: vl4re8.v v12, (a1)
100+
; CHECK-NEXT: csrrs a0, vlenb, zero
101+
; CHECK-NEXT: slli a0, a0, 3
102+
; CHECK-NEXT: add sp, sp, a0
103+
; CHECK-NEXT: addi sp, sp, 16
104+
; CHECK-NEXT: jalr zero, 0(ra)
105+
entry:
106+
%tuple.addr = alloca target("riscv.vector.tuple", <vscale x 32 x i8>, 2), align 1
107+
store target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %tuple, ptr %tuple.addr, align 1
108+
call void asm sideeffect "",
109+
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
110+
%0 = load target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr %tuple.addr, align 1
111+
ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
112+
}

llvm/test/CodeGen/RISCV/rvv/inline-asm.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,3 +420,61 @@ entry:
420420
%0 = tail call <vscale x 1 x i1> asm "vmand.mm $0, $1, $2", "={v0},{v1},{v2}"(<vscale x 1 x i1> %in, <vscale x 1 x i1> %in2)
421421
ret <vscale x 1 x i1> %0
422422
}
423+
424+
define void @test_vector_tuple_type0(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %val, ptr %base) nounwind {
425+
; CHECK-LABEL: test_vector_tuple_type0:
426+
; CHECK: # %bb.0: # %entry
427+
; CHECK-NEXT: #APP
428+
; CHECK-NEXT: vsseg3e8.v v8, (a0)
429+
; CHECK-NEXT: #NO_APP
430+
; CHECK-NEXT: ret
431+
entry:
432+
tail call void asm "vsseg3e8.v $0, ($1)", "^vr,r"(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %val, ptr %base)
433+
ret void
434+
}
435+
436+
define void @test_vector_tuple_type1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %val, ptr %base) nounwind {
437+
; CHECK-LABEL: test_vector_tuple_type1:
438+
; CHECK: # %bb.0: # %entry
439+
; CHECK-NEXT: #APP
440+
; CHECK-NEXT: vsseg3e8.v v8, (a0)
441+
; CHECK-NEXT: #NO_APP
442+
; CHECK-NEXT: ret
443+
entry:
444+
tail call void asm "vsseg3e8.v $0, ($1)", "^vr,r"(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %val, ptr %base)
445+
ret void
446+
}
447+
448+
define void @test_vector_tuple_type2(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %val, target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %val2, target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %val3, ptr %base) nounwind {
449+
; CHECK-LABEL: test_vector_tuple_type2:
450+
; CHECK: # %bb.0: # %entry
451+
; CHECK-NEXT: vl1r.v v23, (a0)
452+
; CHECK-NEXT: csrr a2, vlenb
453+
; CHECK-NEXT: add a0, a0, a2
454+
; CHECK-NEXT: vl1r.v v24, (a0)
455+
; CHECK-NEXT: add a0, a0, a2
456+
; CHECK-NEXT: vl1r.v v25, (a0)
457+
; CHECK-NEXT: add a0, a0, a2
458+
; CHECK-NEXT: vl1r.v v26, (a0)
459+
; CHECK-NEXT: add a0, a0, a2
460+
; CHECK-NEXT: vl1r.v v27, (a0)
461+
; CHECK-NEXT: add a0, a0, a2
462+
; CHECK-NEXT: vl1r.v v28, (a0)
463+
; CHECK-NEXT: add a0, a0, a2
464+
; CHECK-NEXT: vl1r.v v29, (a0)
465+
; CHECK-NEXT: #APP
466+
; CHECK-NEXT: vsseg3e8.v v8, (a1)
467+
; CHECK-NEXT: #NO_APP
468+
; CHECK-NEXT: #APP
469+
; CHECK-NEXT: vsseg7e8.v v16, (a1)
470+
; CHECK-NEXT: #NO_APP
471+
; CHECK-NEXT: #APP
472+
; CHECK-NEXT: vsseg7e8.v v23, (a1)
473+
; CHECK-NEXT: #NO_APP
474+
; CHECK-NEXT: ret
475+
entry:
476+
tail call void asm "vsseg3e8.v $0, ($1)", "^vr,r"(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %val, ptr %base)
477+
tail call void asm "vsseg7e8.v $0, ($1)", "^vr,r"(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %val2, ptr %base)
478+
tail call void asm "vsseg7e8.v $0, ($1)", "^vr,r"(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %val3, ptr %base)
479+
ret void
480+
}

llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@
44

55
; This test previously crashed with an error "ran out of registers during register allocation"
66

7-
declare void @llvm.riscv.vsseg2.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, ptr, <vscale x 16 x i1>, i32)
7+
declare void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 16 x i1>, i32, i32)
88

9-
define void @test_vsseg2_mask_nxv16i16(<vscale x 16 x i16> %val, ptr %base, <vscale x 16 x i1> %mask, i32 %vl) {
9+
define void @test_vsseg2_mask_nxv16i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %val, ptr %base, <vscale x 16 x i1> %mask, i32 %vl) {
1010
; CHECK-LABEL: test_vsseg2_mask_nxv16i16:
1111
; CHECK: # %bb.0: # %entry
12-
; CHECK-NEXT: vmv4r.v v12, v8
1312
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
1413
; CHECK-NEXT: vsseg2e16.v v8, (a0), v0.t
1514
; CHECK-NEXT: ret
1615
entry:
17-
tail call void @llvm.riscv.vsseg2.mask.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, ptr %base, <vscale x 16 x i1> %mask, i32 %vl)
16+
tail call void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %val, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 4)
1817
ret void
1918
}

0 commit comments

Comments
 (0)