diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b49cbab1876d7..d70903519ecb0 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1133,23 +1133,23 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { uint64_t StackSize = Offset; - // Multiply by vscale. - if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) - StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; - // Ensure the alignment of the RVV stack. Since we want the most-aligned // object right at the bottom (i.e., any padding at the top of the frame), // readjust all RVV objects down by the alignment padding. - if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { - StackSize += AlignmentPadding; - for (int FI : ObjectsToAllocate) - MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); + // Stack size and offsets are multiples of vscale, stack alignment is in + // bytes, we can divide stack alignment by minimum vscale to get a maximum + // stack alignment multiple of vscale. + auto VScale = + std::max(ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, 1); + if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) { + if (auto AlignmentPadding = + offsetToAlignment(StackSize, Align(RVVStackAlignVScale))) { + StackSize += AlignmentPadding; + for (int FI : ObjectsToAllocate) + MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); + } } - // Remove vscale. - if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) - StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; - return std::make_pair(StackSize, RVVStackAlign); } diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 35e269b911902..43be8feece23c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -756,3 +756,104 @@ define void @lmul_8_x9() nounwind { %v9 = alloca ret void } + +define void @lmul_16_align() nounwind { +; NOZBA-LABEL: lmul_16_align: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -144 +; NOZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 144 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 24 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -128 +; NOZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; NOZBA-NEXT: vmv.v.i v8, 0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: add a0, sp, a0 +; NOZBA-NEXT: addi a0, a0, 128 +; NOZBA-NEXT: vs8r.v v8, (a0) +; NOZBA-NEXT: csrr a1, vlenb +; NOZBA-NEXT: slli a1, a1, 3 +; NOZBA-NEXT: add a0, a0, a1 +; NOZBA-NEXT: vs8r.v v8, (a0) +; NOZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; NOZBA-NEXT: vmv.v.i v8, 0 +; NOZBA-NEXT: addi a0, sp, 128 +; NOZBA-NEXT: vs1r.v v8, (a0) +; NOZBA-NEXT: addi sp, s0, -144 +; NOZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 144 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul_16_align: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -144 +; ZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 144 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 3 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -128 +; ZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZBA-NEXT: vmv.v.i v8, 0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: add a0, sp, a0 +; ZBA-NEXT: addi a0, a0, 128 +; ZBA-NEXT: vs8r.v v8, (a0) +; ZBA-NEXT: csrr a1, vlenb +; ZBA-NEXT: sh3add a0, a1, a0 +; ZBA-NEXT: vs8r.v v8, (a0) +; ZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZBA-NEXT: vmv.v.i v8, 0 +; ZBA-NEXT: addi a0, sp, 128 +; ZBA-NEXT: vs1r.v v8, (a0) +; ZBA-NEXT: addi sp, s0, -144 +; ZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 144 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul_16_align: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -144 +; NOMUL-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 144 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 3 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -128 +; NOMUL-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; NOMUL-NEXT: vmv.v.i v8, 0 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: add a0, sp, a0 +; NOMUL-NEXT: addi a0, a0, 128 +; NOMUL-NEXT: vs8r.v v8, (a0) +; NOMUL-NEXT: csrr a1, vlenb +; NOMUL-NEXT: slli a1, a1, 3 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: vs8r.v v8, (a0) +; NOMUL-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; NOMUL-NEXT: vmv.v.i v8, 0 +; NOMUL-NEXT: addi a0, sp, 128 +; NOMUL-NEXT: vs1r.v v8, (a0) +; NOMUL-NEXT: addi sp, s0, -144 +; NOMUL-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 144 +; NOMUL-NEXT: ret + %v1 = alloca + %v2 = alloca + store zeroinitializer, ptr %v1 + store zeroinitializer, ptr %v2 + ret void +}