Skip to content

Commit ab393ce

Browse files
authored
[RISCV] Take known minimum vlen into account when calculating alignment padding in assignRVVStackObjectOffsets. (#110312)
If we know vlen is a multiple of 16, we don't need any alignment padding. I wrote the code so that it would generate the minimum amount of padding if the stack align was 32 or larger or if RVVBitsPerBlock was smaller than half the stack alignment.
1 parent 18fa9fa commit ab393ce

34 files changed

+473
-491
lines changed

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1102,16 +1102,25 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
11021102
RVVStackAlign = std::max(RVVStackAlign, ObjectAlign);
11031103
}
11041104

1105+
uint64_t StackSize = Offset;
1106+
1107+
// Multiply by vscale.
1108+
if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
1109+
StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
1110+
11051111
// Ensure the alignment of the RVV stack. Since we want the most-aligned
11061112
// object right at the bottom (i.e., any padding at the top of the frame),
11071113
// readjust all RVV objects down by the alignment padding.
1108-
uint64_t StackSize = Offset;
11091114
if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) {
11101115
StackSize += AlignmentPadding;
11111116
for (int FI : ObjectsToAllocate)
11121117
MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
11131118
}
11141119

1120+
// Remove vscale.
1121+
if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
1122+
StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
1123+
11151124
return std::make_pair(StackSize, RVVStackAlign);
11161125
}
11171126

llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ define void @_Z3foov() {
1717
; CHECK-NEXT: addi sp, sp, -16
1818
; CHECK-NEXT: .cfi_def_cfa_offset 16
1919
; CHECK-NEXT: csrr a0, vlenb
20-
; CHECK-NEXT: li a1, 10
21-
; CHECK-NEXT: mul a0, a0, a1
20+
; CHECK-NEXT: slli a1, a0, 3
21+
; CHECK-NEXT: add a0, a1, a0
2222
; CHECK-NEXT: sub sp, sp, a0
23-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
23+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
2424
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
2525
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
2626
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
@@ -83,8 +83,8 @@ define void @_Z3foov() {
8383
; CHECK-NEXT: addi a0, a0, %lo(var_47)
8484
; CHECK-NEXT: vsseg4e16.v v8, (a0)
8585
; CHECK-NEXT: csrr a0, vlenb
86-
; CHECK-NEXT: li a1, 10
87-
; CHECK-NEXT: mul a0, a0, a1
86+
; CHECK-NEXT: slli a1, a0, 3
87+
; CHECK-NEXT: add a0, a1, a0
8888
; CHECK-NEXT: add sp, sp, a0
8989
; CHECK-NEXT: addi sp, sp, 16
9090
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv-cfi-info.ll

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
1010
; OMIT-FP-NEXT: addi sp, sp, -16
1111
; OMIT-FP-NEXT: .cfi_def_cfa_offset 16
1212
; OMIT-FP-NEXT: csrr a0, vlenb
13-
; OMIT-FP-NEXT: slli a0, a0, 3
13+
; OMIT-FP-NEXT: slli a1, a0, 3
14+
; OMIT-FP-NEXT: sub a0, a1, a0
1415
; OMIT-FP-NEXT: sub sp, sp, a0
15-
; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
16+
; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x07, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 7 * vlenb
1617
; OMIT-FP-NEXT: csrr a0, vlenb
1718
; OMIT-FP-NEXT: li a1, 6
1819
; OMIT-FP-NEXT: mul a0, a0, a1
@@ -26,13 +27,13 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
2627
; OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
2728
; OMIT-FP-NEXT: addi a0, sp, 16
2829
; OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill
29-
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 2 * vlenb
30-
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 4 * vlenb
31-
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x08, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 3 * vlenb
32-
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 8 * vlenb
33-
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x08, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 7 * vlenb
34-
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x08, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 6 * vlenb
35-
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x08, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 5 * vlenb
30+
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7f, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 1 * vlenb
31+
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 3 * vlenb
32+
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 2 * vlenb
33+
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 7 * vlenb
34+
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x08, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 6 * vlenb
35+
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x08, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 5 * vlenb
36+
; OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 4 * vlenb
3637
; OMIT-FP-NEXT: #APP
3738
; OMIT-FP-NEXT: #NO_APP
3839
; OMIT-FP-NEXT: csrr a0, vlenb
@@ -49,7 +50,8 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
4950
; OMIT-FP-NEXT: addi a0, sp, 16
5051
; OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
5152
; OMIT-FP-NEXT: csrr a0, vlenb
52-
; OMIT-FP-NEXT: slli a0, a0, 3
53+
; OMIT-FP-NEXT: slli a1, a0, 3
54+
; OMIT-FP-NEXT: sub a0, a1, a0
5355
; OMIT-FP-NEXT: add sp, sp, a0
5456
; OMIT-FP-NEXT: addi sp, sp, 16
5557
; OMIT-FP-NEXT: ret
@@ -65,44 +67,47 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
6567
; NO-OMIT-FP-NEXT: addi s0, sp, 32
6668
; NO-OMIT-FP-NEXT: .cfi_def_cfa s0, 0
6769
; NO-OMIT-FP-NEXT: csrr a0, vlenb
68-
; NO-OMIT-FP-NEXT: slli a0, a0, 3
70+
; NO-OMIT-FP-NEXT: slli a1, a0, 3
71+
; NO-OMIT-FP-NEXT: sub a0, a1, a0
6972
; NO-OMIT-FP-NEXT: sub sp, sp, a0
7073
; NO-OMIT-FP-NEXT: csrr a0, vlenb
71-
; NO-OMIT-FP-NEXT: slli a0, a0, 1
7274
; NO-OMIT-FP-NEXT: sub a0, s0, a0
7375
; NO-OMIT-FP-NEXT: addi a0, a0, -32
7476
; NO-OMIT-FP-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill
7577
; NO-OMIT-FP-NEXT: csrr a0, vlenb
76-
; NO-OMIT-FP-NEXT: slli a0, a0, 2
78+
; NO-OMIT-FP-NEXT: slli a1, a0, 1
79+
; NO-OMIT-FP-NEXT: add a0, a1, a0
7780
; NO-OMIT-FP-NEXT: sub a0, s0, a0
7881
; NO-OMIT-FP-NEXT: addi a0, a0, -32
7982
; NO-OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
8083
; NO-OMIT-FP-NEXT: csrr a0, vlenb
81-
; NO-OMIT-FP-NEXT: slli a0, a0, 3
84+
; NO-OMIT-FP-NEXT: slli a1, a0, 3
85+
; NO-OMIT-FP-NEXT: sub a0, a1, a0
8286
; NO-OMIT-FP-NEXT: sub a0, s0, a0
8387
; NO-OMIT-FP-NEXT: addi a0, a0, -32
8488
; NO-OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill
85-
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 32 - 2 * vlenb
86-
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 32 - 4 * vlenb
87-
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 32 - 3 * vlenb
88-
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 32 - 8 * vlenb
89-
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 32 - 7 * vlenb
90-
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 32 - 6 * vlenb
91-
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 32 - 5 * vlenb
89+
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7f, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 32 - 1 * vlenb
90+
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 32 - 3 * vlenb
91+
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 32 - 2 * vlenb
92+
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 32 - 7 * vlenb
93+
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 32 - 6 * vlenb
94+
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 32 - 5 * vlenb
95+
; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 32 - 4 * vlenb
9296
; NO-OMIT-FP-NEXT: #APP
9397
; NO-OMIT-FP-NEXT: #NO_APP
9498
; NO-OMIT-FP-NEXT: csrr a0, vlenb
95-
; NO-OMIT-FP-NEXT: slli a0, a0, 1
9699
; NO-OMIT-FP-NEXT: sub a0, s0, a0
97100
; NO-OMIT-FP-NEXT: addi a0, a0, -32
98101
; NO-OMIT-FP-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload
99102
; NO-OMIT-FP-NEXT: csrr a0, vlenb
100-
; NO-OMIT-FP-NEXT: slli a0, a0, 2
103+
; NO-OMIT-FP-NEXT: slli a1, a0, 1
104+
; NO-OMIT-FP-NEXT: add a0, a1, a0
101105
; NO-OMIT-FP-NEXT: sub a0, s0, a0
102106
; NO-OMIT-FP-NEXT: addi a0, a0, -32
103107
; NO-OMIT-FP-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload
104108
; NO-OMIT-FP-NEXT: csrr a0, vlenb
105-
; NO-OMIT-FP-NEXT: slli a0, a0, 3
109+
; NO-OMIT-FP-NEXT: slli a1, a0, 3
110+
; NO-OMIT-FP-NEXT: sub a0, a1, a0
106111
; NO-OMIT-FP-NEXT: sub a0, s0, a0
107112
; NO-OMIT-FP-NEXT: addi a0, a0, -32
108113
; NO-OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload

llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,8 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {
3333
; RV64IV-NEXT: addi sp, sp, -528
3434
; RV64IV-NEXT: .cfi_def_cfa_offset 528
3535
; RV64IV-NEXT: csrr a0, vlenb
36-
; RV64IV-NEXT: slli a0, a0, 1
3736
; RV64IV-NEXT: sub sp, sp, a0
38-
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 2 * vlenb
37+
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb
3938
; RV64IV-NEXT: addi a0, sp, 8
4039
; RV64IV-NEXT: vl1re64.v v8, (a0)
4140
; RV64IV-NEXT: addi a0, sp, 528
@@ -44,7 +43,6 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {
4443
; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, ma
4544
; RV64IV-NEXT: vadd.vv v8, v8, v9
4645
; RV64IV-NEXT: csrr a0, vlenb
47-
; RV64IV-NEXT: slli a0, a0, 1
4846
; RV64IV-NEXT: add sp, sp, a0
4947
; RV64IV-NEXT: addi sp, sp, 528
5048
; RV64IV-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,10 @@ body: |
3838
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0
3939
; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240
4040
; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB
41-
; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1
4241
; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12
4342
; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
4443
; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 undef renamable $v8, killed renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
4544
; CHECK-NEXT: $x10 = PseudoReadVLENB
46-
; CHECK-NEXT: $x10 = SLLI killed $x10, 1
4745
; CHECK-NEXT: $x10 = SUB $x8, killed $x10
4846
; CHECK-NEXT: $x10 = ADDI killed $x10, -2048
4947
; CHECK-NEXT: $x10 = ADDI killed $x10, -224

llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ define void @test(ptr %addr) {
1111
; CHECK-NEXT: addi sp, sp, -16
1212
; CHECK-NEXT: .cfi_def_cfa_offset 16
1313
; CHECK-NEXT: csrrs a1, vlenb, zero
14-
; CHECK-NEXT: slli a1, a1, 2
14+
; CHECK-NEXT: slli a2, a1, 1
15+
; CHECK-NEXT: add a1, a2, a1
1516
; CHECK-NEXT: sub sp, sp, a1
16-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
17+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 3 * vlenb
1718
; CHECK-NEXT: csrrs a1, vlenb, zero
1819
; CHECK-NEXT: add a2, a0, a1
1920
; CHECK-NEXT: vl1re64.v v8, (a2)
@@ -28,7 +29,8 @@ define void @test(ptr %addr) {
2829
; CHECK-NEXT: add a0, a0, a1
2930
; CHECK-NEXT: vs1r.v v8, (a0)
3031
; CHECK-NEXT: csrrs a0, vlenb, zero
31-
; CHECK-NEXT: slli a0, a0, 2
32+
; CHECK-NEXT: slli a1, a0, 1
33+
; CHECK-NEXT: add a0, a1, a0
3234
; CHECK-NEXT: add sp, sp, a0
3335
; CHECK-NEXT: addi sp, sp, 16
3436
; CHECK-NEXT: jalr zero, 0(ra)

0 commit comments

Comments
 (0)