[RISCV] Fold vfmv.f.s into load from stack #110129

lukel97 · 2024-09-26T14:53:58Z

This is the f64/f32 version of #109774.
I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see #110126.

This is the f64/f32 version of llvm#109774. I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see llvm#110126.

llvmbot · 2024-09-26T14:54:35Z

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Changes

This is the f64/f32 version of #109774.
I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see #110126.

Full diff: https://github.com/llvm/llvm-project/pull/110129.diff

2 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+20)
(modified) llvm/test/CodeGen/RISCV/rvv/stack-folding.ll (+100)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 8dafd824963c09..40863bb30e22d7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -784,6 +784,26 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
       }
       break;
     }
+    if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) {
+      unsigned Log2SEW =
+          MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+      if (STI.getXLen() < (1 << Log2SEW))
+        return nullptr;
+      switch (Log2SEW) {
+      case 4:
+        // TODO: Support f16/bf16
+        return nullptr;
+      case 5:
+        LoadOpc = RISCV::FLW;
+        break;
+      case 6:
+        LoadOpc = RISCV::FLD;
+        break;
+      default:
+        llvm_unreachable("Unexpected SEW");
+      }
+      break;
+    }
     return nullptr;
   case RISCV::SEXT_H:
     LoadOpc = RISCV::LH;
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
index 4771d7fe6ec92b..14b8264cedc4d2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
@@ -160,3 +160,103 @@ truebb:
 falsebb:
   ret i8 0
 }
+
+define double @f64(<vscale x 1 x double> %v, i1 %c) {
+; RV32-LABEL: f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    #APP
+; RV32-NEXT:    #NO_APP
+; RV32-NEXT:    beqz a0, .LBB4_2
+; RV32-NEXT:  # %bb.1: # %truebb
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    j .LBB4_3
+; RV32-NEXT:  .LBB4_2: # %falsebb
+; RV32-NEXT:    fcvt.d.w fa0, zero
+; RV32-NEXT:  .LBB4_3: # %falsebb
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 1
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    andi a0, a0, 1
+; RV64-NEXT:    #APP
+; RV64-NEXT:    #NO_APP
+; RV64-NEXT:    beqz a0, .LBB4_2
+; RV64-NEXT:  # %bb.1: # %truebb
+; RV64-NEXT:    fld fa0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    j .LBB4_3
+; RV64-NEXT:  .LBB4_2: # %falsebb
+; RV64-NEXT:    fmv.d.x fa0, zero
+; RV64-NEXT:  .LBB4_3: # %falsebb
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  br i1 %c, label %truebb, label %falsebb
+truebb:
+  %x = extractelement <vscale x 1 x double> %v, i32 0
+  ret double %x
+falsebb:
+  ret double 0.0
+}
+
+define float @f32(<vscale x 2 x float> %v, i1 %c) {
+; CHECK-LABEL: f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    beqz a0, .LBB5_2
+; CHECK-NEXT:  # %bb.1: # %truebb
+; CHECK-NEXT:    flw fa0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    j .LBB5_3
+; CHECK-NEXT:  .LBB5_2: # %falsebb
+; CHECK-NEXT:    fmv.w.x fa0, zero
+; CHECK-NEXT:  .LBB5_3: # %falsebb
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
+  tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  br i1 %c, label %truebb, label %falsebb
+truebb:
+  %x = extractelement <vscale x 2 x float> %v, i32 0
+  ret float %x
+falsebb:
+  ret float 0.0
+}
+

llvm/test/CodeGen/RISCV/rvv/stack-folding.ll

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

topperc

LGTM

preames

LGTM

After llvm#110144, we can finish off llvm#110129 and fold f16 vfmv.f.s into a flh. vfmv.f.s is only available for f16 with zvfh, which in turn requires zfhmin so we can use flh. bf16 has no vfmv.f.s so the extract_vector_elt is lowered as an integer in llvm#110144, and gets the existing integer vmv.x.s fold.

This is the f64/f32 version of llvm#109774. I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see llvm#110126.

After #110144, we can finish off #110129 and fold f16 vfmv.f.s into a flh. vfmv.f.s is only available for f16 with zvfh, which in turn requires zfhmin so we can use flh. bf16 has no vfmv.f.s so the extract_vector_elt is lowered as an integer in #110144, and gets the existing integer vmv.x.s fold.

After llvm#110144, we can finish off llvm#110129 and fold f16 vfmv.f.s into a flh. vfmv.f.s is only available for f16 with zvfh, which in turn requires zfhmin so we can use flh. bf16 has no vfmv.f.s so the extract_vector_elt is lowered as an integer in llvm#110144, and gets the existing integer vmv.x.s fold.

lukel97 added 2 commits September 26, 2024 22:50

Precommit tests

44c90eb

[RISCV] Fold vfmv.f.s into load from stack

157b3cb

This is the f64/f32 version of llvm#109774. I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see llvm#110126.

llvmbot added the backend:RISC-V label Sep 26, 2024

lukel97 requested review from asb, preames, topperc and wangpc-pp September 26, 2024 14:54

topperc reviewed Sep 26, 2024

View reviewed changes

llvm/test/CodeGen/RISCV/rvv/stack-folding.ll Outdated Show resolved Hide resolved

topperc reviewed Sep 26, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp Outdated Show resolved Hide resolved

Remove XLEN check

14b0337

topperc approved these changes Sep 26, 2024

View reviewed changes

preames approved these changes Sep 26, 2024

View reviewed changes

Update test

4f74df7

lukel97 merged commit f6dacda into llvm:main Sep 27, 2024
8 checks passed

lukel97 mentioned this pull request Sep 27, 2024

[RISCV] Fold vfmv.f.s of f16 into load from stack #110214

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV] Fold vfmv.f.s into load from stack #110129

[RISCV] Fold vfmv.f.s into load from stack #110129

Uh oh!

lukel97 commented Sep 26, 2024

Uh oh!

llvmbot commented Sep 26, 2024

Uh oh!

Uh oh!

Uh oh!

topperc left a comment

Uh oh!

preames left a comment

Uh oh!

Uh oh!

Uh oh!

[RISCV] Fold vfmv.f.s into load from stack #110129

[RISCV] Fold vfmv.f.s into load from stack #110129

Uh oh!

Conversation

lukel97 commented Sep 26, 2024

Uh oh!

llvmbot commented Sep 26, 2024

Uh oh!

Uh oh!

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

preames left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!