Skip to content

Commit 950ee75

Browse files
authored
[RISC-V] Fix check of minimum vlen. (#114055)
If we have a minimum vlen, we were adjusting StackSize to change the unit from vscale to bytes, and then calculating the required padding size for alignment in bytes. However, we then used that padding size as an offset in vscale units, resulting in misplaced stack objects. While it would be possible to adjust the object offsets by dividing AlignmentPadding by ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, we can simplify the calculation a bit if instead we adjust the alignment to be in vscale units. @topperc This fixes a bug I am seeing after #110312, but I am not 100% certain I am understanding the code correctly, could you please see if this makes sense to you?
1 parent b510cdb commit 950ee75

File tree

2 files changed

+113
-12
lines changed

2 files changed

+113
-12
lines changed

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,23 +1133,23 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
11331133

11341134
uint64_t StackSize = Offset;
11351135

1136-
// Multiply by vscale.
1137-
if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
1138-
StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
1139-
11401136
// Ensure the alignment of the RVV stack. Since we want the most-aligned
11411137
// object right at the bottom (i.e., any padding at the top of the frame),
11421138
// readjust all RVV objects down by the alignment padding.
1143-
if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) {
1144-
StackSize += AlignmentPadding;
1145-
for (int FI : ObjectsToAllocate)
1146-
MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
1139+
// Stack size and offsets are multiples of vscale, stack alignment is in
1140+
// bytes, we can divide stack alignment by minimum vscale to get a maximum
1141+
// stack alignment multiple of vscale.
1142+
auto VScale =
1143+
std::max<uint64_t>(ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, 1);
1144+
if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) {
1145+
if (auto AlignmentPadding =
1146+
offsetToAlignment(StackSize, Align(RVVStackAlignVScale))) {
1147+
StackSize += AlignmentPadding;
1148+
for (int FI : ObjectsToAllocate)
1149+
MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
1150+
}
11471151
}
11481152

1149-
// Remove vscale.
1150-
if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
1151-
StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
1152-
11531153
return std::make_pair(StackSize, RVVStackAlign);
11541154
}
11551155

llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,3 +756,104 @@ define void @lmul_8_x9() nounwind {
756756
%v9 = alloca <vscale x 8 x i64>
757757
ret void
758758
}
759+
760+
define void @lmul_16_align() nounwind {
761+
; NOZBA-LABEL: lmul_16_align:
762+
; NOZBA: # %bb.0:
763+
; NOZBA-NEXT: addi sp, sp, -144
764+
; NOZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
765+
; NOZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
766+
; NOZBA-NEXT: addi s0, sp, 144
767+
; NOZBA-NEXT: csrr a0, vlenb
768+
; NOZBA-NEXT: li a1, 24
769+
; NOZBA-NEXT: mul a0, a0, a1
770+
; NOZBA-NEXT: sub sp, sp, a0
771+
; NOZBA-NEXT: andi sp, sp, -128
772+
; NOZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
773+
; NOZBA-NEXT: vmv.v.i v8, 0
774+
; NOZBA-NEXT: csrr a0, vlenb
775+
; NOZBA-NEXT: add a0, sp, a0
776+
; NOZBA-NEXT: addi a0, a0, 128
777+
; NOZBA-NEXT: vs8r.v v8, (a0)
778+
; NOZBA-NEXT: csrr a1, vlenb
779+
; NOZBA-NEXT: slli a1, a1, 3
780+
; NOZBA-NEXT: add a0, a0, a1
781+
; NOZBA-NEXT: vs8r.v v8, (a0)
782+
; NOZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
783+
; NOZBA-NEXT: vmv.v.i v8, 0
784+
; NOZBA-NEXT: addi a0, sp, 128
785+
; NOZBA-NEXT: vs1r.v v8, (a0)
786+
; NOZBA-NEXT: addi sp, s0, -144
787+
; NOZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
788+
; NOZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
789+
; NOZBA-NEXT: addi sp, sp, 144
790+
; NOZBA-NEXT: ret
791+
;
792+
; ZBA-LABEL: lmul_16_align:
793+
; ZBA: # %bb.0:
794+
; ZBA-NEXT: addi sp, sp, -144
795+
; ZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
796+
; ZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
797+
; ZBA-NEXT: addi s0, sp, 144
798+
; ZBA-NEXT: csrr a0, vlenb
799+
; ZBA-NEXT: slli a0, a0, 3
800+
; ZBA-NEXT: sh1add a0, a0, a0
801+
; ZBA-NEXT: sub sp, sp, a0
802+
; ZBA-NEXT: andi sp, sp, -128
803+
; ZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
804+
; ZBA-NEXT: vmv.v.i v8, 0
805+
; ZBA-NEXT: csrr a0, vlenb
806+
; ZBA-NEXT: add a0, sp, a0
807+
; ZBA-NEXT: addi a0, a0, 128
808+
; ZBA-NEXT: vs8r.v v8, (a0)
809+
; ZBA-NEXT: csrr a1, vlenb
810+
; ZBA-NEXT: sh3add a0, a1, a0
811+
; ZBA-NEXT: vs8r.v v8, (a0)
812+
; ZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
813+
; ZBA-NEXT: vmv.v.i v8, 0
814+
; ZBA-NEXT: addi a0, sp, 128
815+
; ZBA-NEXT: vs1r.v v8, (a0)
816+
; ZBA-NEXT: addi sp, s0, -144
817+
; ZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
818+
; ZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
819+
; ZBA-NEXT: addi sp, sp, 144
820+
; ZBA-NEXT: ret
821+
;
822+
; NOMUL-LABEL: lmul_16_align:
823+
; NOMUL: # %bb.0:
824+
; NOMUL-NEXT: addi sp, sp, -144
825+
; NOMUL-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
826+
; NOMUL-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
827+
; NOMUL-NEXT: addi s0, sp, 144
828+
; NOMUL-NEXT: csrr a0, vlenb
829+
; NOMUL-NEXT: slli a0, a0, 3
830+
; NOMUL-NEXT: mv a1, a0
831+
; NOMUL-NEXT: slli a0, a0, 1
832+
; NOMUL-NEXT: add a0, a0, a1
833+
; NOMUL-NEXT: sub sp, sp, a0
834+
; NOMUL-NEXT: andi sp, sp, -128
835+
; NOMUL-NEXT: vsetvli a0, zero, e64, m8, ta, ma
836+
; NOMUL-NEXT: vmv.v.i v8, 0
837+
; NOMUL-NEXT: csrr a0, vlenb
838+
; NOMUL-NEXT: add a0, sp, a0
839+
; NOMUL-NEXT: addi a0, a0, 128
840+
; NOMUL-NEXT: vs8r.v v8, (a0)
841+
; NOMUL-NEXT: csrr a1, vlenb
842+
; NOMUL-NEXT: slli a1, a1, 3
843+
; NOMUL-NEXT: add a0, a0, a1
844+
; NOMUL-NEXT: vs8r.v v8, (a0)
845+
; NOMUL-NEXT: vsetvli a0, zero, e64, m1, ta, ma
846+
; NOMUL-NEXT: vmv.v.i v8, 0
847+
; NOMUL-NEXT: addi a0, sp, 128
848+
; NOMUL-NEXT: vs1r.v v8, (a0)
849+
; NOMUL-NEXT: addi sp, s0, -144
850+
; NOMUL-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
851+
; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
852+
; NOMUL-NEXT: addi sp, sp, 144
853+
; NOMUL-NEXT: ret
854+
%v1 = alloca <vscale x 16 x i64>
855+
%v2 = alloca <vscale x 1 x i64>
856+
store <vscale x 16 x i64> zeroinitializer, ptr %v1
857+
store <vscale x 1 x i64> zeroinitializer, ptr %v2
858+
ret void
859+
}

0 commit comments

Comments
 (0)