diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 8dafd824963c0..10b4e4870aebe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -784,6 +784,24 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( } break; } + if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) { + unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + switch (Log2SEW) { + case 4: + // TODO: Support f16/bf16 + return nullptr; + case 5: + LoadOpc = RISCV::FLW; + break; + case 6: + LoadOpc = RISCV::FLD; + break; + default: + llvm_unreachable("Unexpected SEW"); + } + break; + } return nullptr; case RISCV::SEXT_H: LoadOpc = RISCV::LH; diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 1395dc914bb40..3c184c112e77a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -2261,10 +2261,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -2394,10 +2391,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 @@ -2506,10 +2500,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, s1 ; CHECK-V-NEXT: blez s1, .LBB20_2 @@ -2668,10 +2659,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -2801,10 +2789,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 @@ -2913,10 +2898,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, s1 ; CHECK-V-NEXT: blez s1, .LBB23_2 @@ -5597,10 +5579,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -5831,10 +5810,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB47_2 @@ -5983,10 +5959,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -6217,10 +6190,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB50_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll index 4771d7fe6ec92..f966835622a9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -160,3 +160,100 @@ truebb: falsebb: ret i8 0 } + +define double @f64( %v, i1 %c) { +; RV32-LABEL: f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: #APP +; RV32-NEXT: #NO_APP +; RV32-NEXT: beqz a0, .LBB4_2 +; RV32-NEXT: # %bb.1: # %truebb +; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: j .LBB4_3 +; RV32-NEXT: .LBB4_2: # %falsebb +; RV32-NEXT: fcvt.d.w fa0, zero +; RV32-NEXT: .LBB4_3: # %falsebb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: #APP +; RV64-NEXT: #NO_APP +; RV64-NEXT: beqz a0, .LBB4_2 +; RV64-NEXT: # %bb.1: # %truebb +; RV64-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: j .LBB4_3 +; RV64-NEXT: .LBB4_2: # %falsebb +; RV64-NEXT: fmv.d.x fa0, zero +; RV64-NEXT: .LBB4_3: # %falsebb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret double %x +falsebb: + ret double 0.0 +} + +define float @f32( %v, i1 %c) { +; CHECK-LABEL: f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: j .LBB5_3 +; CHECK-NEXT: .LBB5_2: # %falsebb +; CHECK-NEXT: fmv.w.x fa0, zero +; CHECK-NEXT: .LBB5_3: # %falsebb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret float %x +falsebb: + ret float 0.0 +} +