diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 41f93fde17d32..8210f756f3a24 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -763,6 +763,29 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( LoadOpc = RISCV::LBU; break; } + if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) { + unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + if (STI.getXLen() < (1 << Log2SEW)) + return nullptr; + switch (Log2SEW) { + case 3: + LoadOpc = RISCV::LB; + break; + case 4: + LoadOpc = RISCV::LH; + break; + case 5: + LoadOpc = RISCV::LW; + break; + case 6: + LoadOpc = RISCV::LD; + break; + default: + llvm_unreachable("Unexpected SEW"); + } + break; + } return nullptr; case RISCV::SEXT_H: LoadOpc = RISCV::LH; diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll new file mode 100644 index 0000000000000..4771d7fe6ec92 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s + +define i64 @i64( %v, i1 %c) { +; RV32-LABEL: i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: #APP +; RV32-NEXT: #NO_APP +; RV32-NEXT: beqz a0, .LBB0_2 +; RV32-NEXT: # %bb.1: # %truebb +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v8, v9, a0 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: .LBB0_2: # %falsebb +; RV32-NEXT: li a1, 0 +; RV32-NEXT: .LBB0_3: # %falsebb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: #APP +; RV64-NEXT: #NO_APP +; RV64-NEXT: beqz a0, .LBB0_2 +; RV64-NEXT: # %bb.1: # %truebb +; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: .LBB0_2: # %falsebb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add sp, sp, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i64 %x +falsebb: + ret i64 0 +} + +define i32 @i32( %v, i1 %c) { +; CHECK-LABEL: i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB1_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i32 %x +falsebb: + ret i32 0 +} + +define i16 @i16( %v, i1 %c) { +; CHECK-LABEL: i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB2_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i16 %x +falsebb: + ret i16 0 +} + +define i8 @i8( %v, i1 %c) { +; CHECK-LABEL: i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB3_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i8 %x +falsebb: + ret i8 0 +}