Skip to content

Commit c01406f

Browse files
lukel97Sterling-Augustine
authored andcommitted
[RISCV] Fold vfmv.f.s of f16 into load from stack (llvm#110214)
After llvm#110144, we can finish off llvm#110129 and fold f16 vfmv.f.s into a flh. vfmv.f.s is only available for f16 with zvfh, which in turn requires zfhmin so we can use flh. bf16 has no vfmv.f.s so the extract_vector_elt is lowered as an integer in llvm#110144, and gets the existing integer vmv.x.s fold.
1 parent 5f4df56 commit c01406f

File tree

2 files changed

+135
-4
lines changed

2 files changed

+135
-4
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -804,8 +804,8 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
804804
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
805805
switch (Log2SEW) {
806806
case 4:
807-
// TODO: Support f16/bf16
808-
return nullptr;
807+
LoadOpc = RISCV::FLH;
808+
break;
809809
case 5:
810810
LoadOpc = RISCV::FLW;
811811
break;

llvm/test/CodeGen/RISCV/rvv/stack-folding.ll

Lines changed: 133 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s
3-
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32,ZFMIN %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64,ZFMIN %s
4+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32,NOZFMIN %s
5+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64,NOZFMIN %s
46

57
define i64 @i64(<vscale x 1 x i64> %v, i1 %c) {
68
; RV32-LABEL: i64:
@@ -241,3 +243,132 @@ falsebb:
241243
ret float 0.0
242244
}
243245

246+
define half @f16(<vscale x 1 x half> %v, i1 %c) {
247+
; ZFMIN-LABEL: f16:
248+
; ZFMIN: # %bb.0:
249+
; ZFMIN-NEXT: addi sp, sp, -16
250+
; ZFMIN-NEXT: .cfi_def_cfa_offset 16
251+
; ZFMIN-NEXT: csrr a1, vlenb
252+
; ZFMIN-NEXT: slli a1, a1, 1
253+
; ZFMIN-NEXT: sub sp, sp, a1
254+
; ZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
255+
; ZFMIN-NEXT: addi a1, sp, 16
256+
; ZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
257+
; ZFMIN-NEXT: andi a0, a0, 1
258+
; ZFMIN-NEXT: #APP
259+
; ZFMIN-NEXT: #NO_APP
260+
; ZFMIN-NEXT: beqz a0, .LBB6_2
261+
; ZFMIN-NEXT: # %bb.1: # %truebb
262+
; ZFMIN-NEXT: flh fa0, 16(sp) # 8-byte Folded Reload
263+
; ZFMIN-NEXT: j .LBB6_3
264+
; ZFMIN-NEXT: .LBB6_2: # %falsebb
265+
; ZFMIN-NEXT: fmv.h.x fa0, zero
266+
; ZFMIN-NEXT: .LBB6_3: # %falsebb
267+
; ZFMIN-NEXT: csrr a0, vlenb
268+
; ZFMIN-NEXT: slli a0, a0, 1
269+
; ZFMIN-NEXT: add sp, sp, a0
270+
; ZFMIN-NEXT: addi sp, sp, 16
271+
; ZFMIN-NEXT: ret
272+
;
273+
; NOZFMIN-LABEL: f16:
274+
; NOZFMIN: # %bb.0:
275+
; NOZFMIN-NEXT: addi sp, sp, -16
276+
; NOZFMIN-NEXT: .cfi_def_cfa_offset 16
277+
; NOZFMIN-NEXT: csrr a1, vlenb
278+
; NOZFMIN-NEXT: slli a1, a1, 1
279+
; NOZFMIN-NEXT: sub sp, sp, a1
280+
; NOZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
281+
; NOZFMIN-NEXT: addi a1, sp, 16
282+
; NOZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
283+
; NOZFMIN-NEXT: andi a0, a0, 1
284+
; NOZFMIN-NEXT: #APP
285+
; NOZFMIN-NEXT: #NO_APP
286+
; NOZFMIN-NEXT: beqz a0, .LBB6_2
287+
; NOZFMIN-NEXT: # %bb.1: # %truebb
288+
; NOZFMIN-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
289+
; NOZFMIN-NEXT: lui a1, 1048560
290+
; NOZFMIN-NEXT: or a0, a0, a1
291+
; NOZFMIN-NEXT: j .LBB6_3
292+
; NOZFMIN-NEXT: .LBB6_2: # %falsebb
293+
; NOZFMIN-NEXT: lui a0, 1048560
294+
; NOZFMIN-NEXT: .LBB6_3: # %falsebb
295+
; NOZFMIN-NEXT: fmv.w.x fa0, a0
296+
; NOZFMIN-NEXT: csrr a0, vlenb
297+
; NOZFMIN-NEXT: slli a0, a0, 1
298+
; NOZFMIN-NEXT: add sp, sp, a0
299+
; NOZFMIN-NEXT: addi sp, sp, 16
300+
; NOZFMIN-NEXT: ret
301+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
302+
br i1 %c, label %truebb, label %falsebb
303+
truebb:
304+
%x = extractelement <vscale x 1 x half> %v, i32 0
305+
ret half %x
306+
falsebb:
307+
ret half 0.0
308+
}
309+
310+
define bfloat @bf16(<vscale x 2 x bfloat> %v, i1 %c) {
311+
; ZFMIN-LABEL: bf16:
312+
; ZFMIN: # %bb.0:
313+
; ZFMIN-NEXT: addi sp, sp, -16
314+
; ZFMIN-NEXT: .cfi_def_cfa_offset 16
315+
; ZFMIN-NEXT: csrr a1, vlenb
316+
; ZFMIN-NEXT: slli a1, a1, 1
317+
; ZFMIN-NEXT: sub sp, sp, a1
318+
; ZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
319+
; ZFMIN-NEXT: addi a1, sp, 16
320+
; ZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
321+
; ZFMIN-NEXT: andi a0, a0, 1
322+
; ZFMIN-NEXT: #APP
323+
; ZFMIN-NEXT: #NO_APP
324+
; ZFMIN-NEXT: beqz a0, .LBB7_2
325+
; ZFMIN-NEXT: # %bb.1: # %truebb
326+
; ZFMIN-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
327+
; ZFMIN-NEXT: fmv.h.x fa0, a0
328+
; ZFMIN-NEXT: j .LBB7_3
329+
; ZFMIN-NEXT: .LBB7_2: # %falsebb
330+
; ZFMIN-NEXT: fmv.h.x fa0, zero
331+
; ZFMIN-NEXT: .LBB7_3: # %falsebb
332+
; ZFMIN-NEXT: csrr a0, vlenb
333+
; ZFMIN-NEXT: slli a0, a0, 1
334+
; ZFMIN-NEXT: add sp, sp, a0
335+
; ZFMIN-NEXT: addi sp, sp, 16
336+
; ZFMIN-NEXT: ret
337+
;
338+
; NOZFMIN-LABEL: bf16:
339+
; NOZFMIN: # %bb.0:
340+
; NOZFMIN-NEXT: addi sp, sp, -16
341+
; NOZFMIN-NEXT: .cfi_def_cfa_offset 16
342+
; NOZFMIN-NEXT: csrr a1, vlenb
343+
; NOZFMIN-NEXT: slli a1, a1, 1
344+
; NOZFMIN-NEXT: sub sp, sp, a1
345+
; NOZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
346+
; NOZFMIN-NEXT: addi a1, sp, 16
347+
; NOZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
348+
; NOZFMIN-NEXT: andi a0, a0, 1
349+
; NOZFMIN-NEXT: #APP
350+
; NOZFMIN-NEXT: #NO_APP
351+
; NOZFMIN-NEXT: beqz a0, .LBB7_2
352+
; NOZFMIN-NEXT: # %bb.1: # %truebb
353+
; NOZFMIN-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
354+
; NOZFMIN-NEXT: lui a1, 1048560
355+
; NOZFMIN-NEXT: or a0, a0, a1
356+
; NOZFMIN-NEXT: j .LBB7_3
357+
; NOZFMIN-NEXT: .LBB7_2: # %falsebb
358+
; NOZFMIN-NEXT: lui a0, 1048560
359+
; NOZFMIN-NEXT: .LBB7_3: # %falsebb
360+
; NOZFMIN-NEXT: fmv.w.x fa0, a0
361+
; NOZFMIN-NEXT: csrr a0, vlenb
362+
; NOZFMIN-NEXT: slli a0, a0, 1
363+
; NOZFMIN-NEXT: add sp, sp, a0
364+
; NOZFMIN-NEXT: addi sp, sp, 16
365+
; NOZFMIN-NEXT: ret
366+
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
367+
br i1 %c, label %truebb, label %falsebb
368+
truebb:
369+
%x = extractelement <vscale x 2 x bfloat> %v, i32 0
370+
ret bfloat %x
371+
falsebb:
372+
ret bfloat 0.0
373+
}
374+

0 commit comments

Comments
 (0)