Skip to content

Commit 63b534b

Browse files
authored
[RISCV] Fold vmv.x.s into load from stack (#109774)
If a vector is reloaded from the stack to be used in vmv.x.s, we can tell foldMemoryOperandImpl to fold it into a scalar load. If XLEN < SEW then this currently just bails. I couldn't think of a way to express a vmv.x.s that truncates in LLVM IR.
1 parent 8ea0dba commit 63b534b

File tree

2 files changed

+185
-0
lines changed

2 files changed

+185
-0
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

+23
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,29 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
761761
LoadOpc = RISCV::LBU;
762762
break;
763763
}
764+
if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) {
765+
unsigned Log2SEW =
766+
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
767+
if (STI.getXLen() < (1 << Log2SEW))
768+
return nullptr;
769+
switch (Log2SEW) {
770+
case 3:
771+
LoadOpc = RISCV::LB;
772+
break;
773+
case 4:
774+
LoadOpc = RISCV::LH;
775+
break;
776+
case 5:
777+
LoadOpc = RISCV::LW;
778+
break;
779+
case 6:
780+
LoadOpc = RISCV::LD;
781+
break;
782+
default:
783+
llvm_unreachable("Unexpected SEW");
784+
}
785+
break;
786+
}
764787
return nullptr;
765788
case RISCV::SEXT_H:
766789
LoadOpc = RISCV::LH;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s
4+
5+
define i64 @i64(<vscale x 1 x i64> %v, i1 %c) {
6+
; RV32-LABEL: i64:
7+
; RV32: # %bb.0:
8+
; RV32-NEXT: addi sp, sp, -16
9+
; RV32-NEXT: .cfi_def_cfa_offset 16
10+
; RV32-NEXT: csrr a1, vlenb
11+
; RV32-NEXT: slli a1, a1, 1
12+
; RV32-NEXT: sub sp, sp, a1
13+
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
14+
; RV32-NEXT: addi a1, sp, 16
15+
; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
16+
; RV32-NEXT: andi a0, a0, 1
17+
; RV32-NEXT: #APP
18+
; RV32-NEXT: #NO_APP
19+
; RV32-NEXT: beqz a0, .LBB0_2
20+
; RV32-NEXT: # %bb.1: # %truebb
21+
; RV32-NEXT: li a0, 32
22+
; RV32-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload
23+
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
24+
; RV32-NEXT: vsrl.vx v8, v9, a0
25+
; RV32-NEXT: vmv.x.s a1, v8
26+
; RV32-NEXT: vmv.x.s a0, v9
27+
; RV32-NEXT: j .LBB0_3
28+
; RV32-NEXT: .LBB0_2: # %falsebb
29+
; RV32-NEXT: li a1, 0
30+
; RV32-NEXT: .LBB0_3: # %falsebb
31+
; RV32-NEXT: csrr a2, vlenb
32+
; RV32-NEXT: slli a2, a2, 1
33+
; RV32-NEXT: add sp, sp, a2
34+
; RV32-NEXT: addi sp, sp, 16
35+
; RV32-NEXT: ret
36+
;
37+
; RV64-LABEL: i64:
38+
; RV64: # %bb.0:
39+
; RV64-NEXT: addi sp, sp, -16
40+
; RV64-NEXT: .cfi_def_cfa_offset 16
41+
; RV64-NEXT: csrr a1, vlenb
42+
; RV64-NEXT: slli a1, a1, 1
43+
; RV64-NEXT: sub sp, sp, a1
44+
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
45+
; RV64-NEXT: addi a1, sp, 16
46+
; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
47+
; RV64-NEXT: andi a0, a0, 1
48+
; RV64-NEXT: #APP
49+
; RV64-NEXT: #NO_APP
50+
; RV64-NEXT: beqz a0, .LBB0_2
51+
; RV64-NEXT: # %bb.1: # %truebb
52+
; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
53+
; RV64-NEXT: .LBB0_2: # %falsebb
54+
; RV64-NEXT: csrr a1, vlenb
55+
; RV64-NEXT: slli a1, a1, 1
56+
; RV64-NEXT: add sp, sp, a1
57+
; RV64-NEXT: addi sp, sp, 16
58+
; RV64-NEXT: ret
59+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
60+
br i1 %c, label %truebb, label %falsebb
61+
truebb:
62+
%x = extractelement <vscale x 1 x i64> %v, i32 0
63+
ret i64 %x
64+
falsebb:
65+
ret i64 0
66+
}
67+
68+
define i32 @i32(<vscale x 2 x i32> %v, i1 %c) {
69+
; CHECK-LABEL: i32:
70+
; CHECK: # %bb.0:
71+
; CHECK-NEXT: addi sp, sp, -16
72+
; CHECK-NEXT: .cfi_def_cfa_offset 16
73+
; CHECK-NEXT: csrr a1, vlenb
74+
; CHECK-NEXT: slli a1, a1, 1
75+
; CHECK-NEXT: sub sp, sp, a1
76+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
77+
; CHECK-NEXT: addi a1, sp, 16
78+
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
79+
; CHECK-NEXT: andi a0, a0, 1
80+
; CHECK-NEXT: #APP
81+
; CHECK-NEXT: #NO_APP
82+
; CHECK-NEXT: beqz a0, .LBB1_2
83+
; CHECK-NEXT: # %bb.1: # %truebb
84+
; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload
85+
; CHECK-NEXT: .LBB1_2: # %falsebb
86+
; CHECK-NEXT: csrr a1, vlenb
87+
; CHECK-NEXT: slli a1, a1, 1
88+
; CHECK-NEXT: add sp, sp, a1
89+
; CHECK-NEXT: addi sp, sp, 16
90+
; CHECK-NEXT: ret
91+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
92+
br i1 %c, label %truebb, label %falsebb
93+
truebb:
94+
%x = extractelement <vscale x 2 x i32> %v, i32 0
95+
ret i32 %x
96+
falsebb:
97+
ret i32 0
98+
}
99+
100+
define i16 @i16(<vscale x 4 x i16> %v, i1 %c) {
101+
; CHECK-LABEL: i16:
102+
; CHECK: # %bb.0:
103+
; CHECK-NEXT: addi sp, sp, -16
104+
; CHECK-NEXT: .cfi_def_cfa_offset 16
105+
; CHECK-NEXT: csrr a1, vlenb
106+
; CHECK-NEXT: slli a1, a1, 1
107+
; CHECK-NEXT: sub sp, sp, a1
108+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
109+
; CHECK-NEXT: addi a1, sp, 16
110+
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
111+
; CHECK-NEXT: andi a0, a0, 1
112+
; CHECK-NEXT: #APP
113+
; CHECK-NEXT: #NO_APP
114+
; CHECK-NEXT: beqz a0, .LBB2_2
115+
; CHECK-NEXT: # %bb.1: # %truebb
116+
; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
117+
; CHECK-NEXT: .LBB2_2: # %falsebb
118+
; CHECK-NEXT: csrr a1, vlenb
119+
; CHECK-NEXT: slli a1, a1, 1
120+
; CHECK-NEXT: add sp, sp, a1
121+
; CHECK-NEXT: addi sp, sp, 16
122+
; CHECK-NEXT: ret
123+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
124+
br i1 %c, label %truebb, label %falsebb
125+
truebb:
126+
%x = extractelement <vscale x 4 x i16> %v, i32 0
127+
ret i16 %x
128+
falsebb:
129+
ret i16 0
130+
}
131+
132+
define i8 @i8(<vscale x 8 x i8> %v, i1 %c) {
133+
; CHECK-LABEL: i8:
134+
; CHECK: # %bb.0:
135+
; CHECK-NEXT: addi sp, sp, -16
136+
; CHECK-NEXT: .cfi_def_cfa_offset 16
137+
; CHECK-NEXT: csrr a1, vlenb
138+
; CHECK-NEXT: slli a1, a1, 1
139+
; CHECK-NEXT: sub sp, sp, a1
140+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
141+
; CHECK-NEXT: addi a1, sp, 16
142+
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
143+
; CHECK-NEXT: andi a0, a0, 1
144+
; CHECK-NEXT: #APP
145+
; CHECK-NEXT: #NO_APP
146+
; CHECK-NEXT: beqz a0, .LBB3_2
147+
; CHECK-NEXT: # %bb.1: # %truebb
148+
; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload
149+
; CHECK-NEXT: .LBB3_2: # %falsebb
150+
; CHECK-NEXT: csrr a1, vlenb
151+
; CHECK-NEXT: slli a1, a1, 1
152+
; CHECK-NEXT: add sp, sp, a1
153+
; CHECK-NEXT: addi sp, sp, 16
154+
; CHECK-NEXT: ret
155+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
156+
br i1 %c, label %truebb, label %falsebb
157+
truebb:
158+
%x = extractelement <vscale x 8 x i8> %v, i32 0
159+
ret i8 %x
160+
falsebb:
161+
ret i8 0
162+
}

0 commit comments

Comments
 (0)