From 44c90eb2404bfc08bccffc0ced04bcf46f388388 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 26 Sep 2024 22:50:12 +0800 Subject: [PATCH 1/4] Precommit tests --- llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 106 +++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll index 4771d7fe6ec92..e4e852a295ab5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -160,3 +160,109 @@ truebb: falsebb: ret i8 0 } + +define double @f64( %v, i1 %c) { +; RV32-LABEL: f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: #APP +; RV32-NEXT: #NO_APP +; RV32-NEXT: beqz a0, .LBB4_2 +; RV32-NEXT: # %bb.1: # %truebb +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: j .LBB4_3 +; RV32-NEXT: .LBB4_2: # %falsebb +; RV32-NEXT: fcvt.d.w fa0, zero +; RV32-NEXT: .LBB4_3: # %falsebb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: #APP +; RV64-NEXT: #NO_APP +; RV64-NEXT: beqz a0, .LBB4_2 +; RV64-NEXT: # %bb.1: # %truebb +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: j .LBB4_3 +; RV64-NEXT: .LBB4_2: # %falsebb +; RV64-NEXT: fmv.d.x fa0, zero +; RV64-NEXT: .LBB4_3: # %falsebb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret double %x +falsebb: + ret double 0.0 +} + +define float @f32( %v, i1 %c) { +; CHECK-LABEL: f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: j .LBB5_3 +; CHECK-NEXT: .LBB5_2: # %falsebb +; CHECK-NEXT: fmv.w.x fa0, zero +; CHECK-NEXT: .LBB5_3: # %falsebb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret float %x +falsebb: + ret float 0.0 +} + From 157b3cbfe5b93e555869f56557d66e6e931f0629 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 26 Sep 2024 22:51:00 +0800 Subject: [PATCH 2/4] [RISCV] Fold vfmv.f.s into load from stack This is the f64/f32 version of #109774. I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see #110126. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 20 ++++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 10 ++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 8dafd824963c0..40863bb30e22d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -784,6 +784,26 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( } break; } + if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) { + unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + if (STI.getXLen() < (1 << Log2SEW)) + return nullptr; + switch (Log2SEW) { + case 4: + // TODO: Support f16/bf16 + return nullptr; + case 5: + LoadOpc = RISCV::FLW; + break; + case 6: + LoadOpc = RISCV::FLD; + break; + default: + llvm_unreachable("Unexpected SEW"); + } + break; + } return nullptr; case RISCV::SEXT_H: LoadOpc = RISCV::LH; diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll index e4e852a295ab5..14b8264cedc4d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -206,10 +206,7 @@ define double @f64( %v, i1 %c) { ; RV64-NEXT: #NO_APP ; RV64-NEXT: beqz a0, .LBB4_2 ; RV64-NEXT: # %bb.1: # %truebb -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload ; RV64-NEXT: j .LBB4_3 ; RV64-NEXT: .LBB4_2: # %falsebb ; RV64-NEXT: fmv.d.x fa0, zero @@ -244,10 +241,7 @@ define float @f32( %v, i1 %c) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: beqz a0, .LBB5_2 ; CHECK-NEXT: # %bb.1: # %truebb -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: j .LBB5_3 ; CHECK-NEXT: .LBB5_2: # %falsebb ; CHECK-NEXT: fmv.w.x fa0, zero From 14b033789caec0e3a7e61e39f60d16730c7cc17a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 26 Sep 2024 23:54:22 +0800 Subject: [PATCH 3/4] Remove XLEN check --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 2 -- llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 40863bb30e22d..10b4e4870aebe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -787,8 +787,6 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) { unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); - if (STI.getXLen() < (1 << Log2SEW)) - return nullptr; switch (Log2SEW) { case 4: // TODO: Support f16/bf16 diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll index 14b8264cedc4d..f966835622a9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -177,10 +177,7 @@ define double @f64( %v, i1 %c) { ; RV32-NEXT: #NO_APP ; RV32-NEXT: beqz a0, .LBB4_2 ; RV32-NEXT: # %bb.1: # %truebb -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload ; RV32-NEXT: j .LBB4_3 ; RV32-NEXT: .LBB4_2: # %falsebb ; RV32-NEXT: fcvt.d.w fa0, zero From 4f74df7f73b9aa07cb24435f4252be95d440d617 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 27 Sep 2024 07:59:30 +0800 Subject: [PATCH 4/4] Update test --- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 50 ++++--------------- 1 file changed, 10 insertions(+), 40 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 1395dc914bb40..3c184c112e77a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -2261,10 +2261,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -2394,10 +2391,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 @@ -2506,10 +2500,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, s1 ; CHECK-V-NEXT: blez s1, .LBB20_2 @@ -2668,10 +2659,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -2801,10 +2789,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 @@ -2913,10 +2898,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, s1 ; CHECK-V-NEXT: blez s1, .LBB23_2 @@ -5597,10 +5579,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -5831,10 +5810,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB47_2 @@ -5983,10 +5959,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -6217,10 +6190,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB50_2