Skip to content

[ARM] Use RegisterClassInfo::getRegPressureSetLimit #120377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 20, 2024

Conversation

wangpc-pp
Copy link
Contributor

RegisterClassInfo::getRegPressureSetLimit is a wrapper of
TargetRegisterInfo::getRegPressureSetLimit with some logics to
adjust the limit by removing reserved registers.

It seems that we shouldn't use TargetRegisterInfo::getRegPressureSetLimit
directly, just like the comment "This limit must be adjusted
dynamically for reserved registers" said.

Separate from #118787

`RegisterClassInfo::getRegPressureSetLimit` is a wrapper of
`TargetRegisterInfo::getRegPressureSetLimit` with some logics to
adjust the limit by removing reserved registers.

It seems that we shouldn't use `TargetRegisterInfo::getRegPressureSetLimit`
directly, just like the comment "This limit must be adjusted
dynamically for reserved registers" said.

Separate from llvm#118787
@llvmbot
Copy link
Member

llvmbot commented Dec 18, 2024

@llvm/pr-subscribers-backend-arm

Author: Pengcheng Wang (wangpc-pp)

Changes

RegisterClassInfo::getRegPressureSetLimit is a wrapper of
TargetRegisterInfo::getRegPressureSetLimit with some logics to
adjust the limit by removing reserved registers.

It seems that we shouldn't use TargetRegisterInfo::getRegPressureSetLimit
directly, just like the comment "This limit must be adjusted
dynamically for reserved registers" said.

Separate from #118787


Full diff: https://github.com/llvm/llvm-project/pull/120377.diff

2 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp (+1-1)
  • (modified) llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll (+32-43)
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e6b37dd9161685..891d0c20ebc448 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -6979,7 +6979,7 @@ bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
 
   auto &P = RPTracker.getPressure().MaxSetPressure;
   for (unsigned I = 0, E = P.size(); I < E; ++I)
-    if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
+    if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
       return true;
     }
   return false;
diff --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
index 43ed5eefbf4c77..7d7a65640d3705 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
@@ -17,51 +17,40 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
 ; CHECK-NEXT:    mov.w r5, #0
 ; CHECK-NEXT:    csel r7, r6, r5, hs
 ; CHECK-NEXT:    add.w lr, r7, #1
-; CHECK-NEXT:    mov r4, r5
-; CHECK-NEXT:    vldrh.u16 q0, [r0], #32
+; CHECK-NEXT:    mov r6, r5
+; CHECK-NEXT:    vldrh.u16 q1, [r0], #32
 ; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    mov r8, r5
-; CHECK-NEXT:    vldrh.u16 q1, [r1], #32
-; CHECK-NEXT:    vmlsldava.s16 r4, r7, q0, q1
-; CHECK-NEXT:    vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT:    vmlaldavax.s16 r8, r5, q0, q1
-; CHECK-NEXT:    vldrh.u16 q3, [r1, #-16]
-; CHECK-NEXT:    vmlsldava.s16 r4, r7, q2, q3
 ; CHECK-NEXT:    vldrh.u16 q0, [r1], #32
-; CHECK-NEXT:    sub.w lr, lr, #1
-; CHECK-NEXT:    cmp.w lr, #0
-; CHECK-NEXT:    vldrh.u16 q1, [r0], #32
-; CHECK-NEXT:    beq .LBB0_3
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  .LBB0_2: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmlaldavax.s16 r8, r5, q2, q3
-; CHECK-NEXT:    vldrh.u16 q3, [r1, #-16]
-; CHECK-NEXT:    vmlsldava.s16 r4, r7, q1, q0
+; CHECK-NEXT:    vmlsldava.s16 r8, r7, q1, q0
 ; CHECK-NEXT:    vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT:    vmlaldavax.s16 r8, r5, q1, q0
-; CHECK-NEXT:    vldrh.u16 q1, [r0], #32
-; CHECK-NEXT:    vmlsldava.s16 r4, r7, q2, q3
+; CHECK-NEXT:    vmlaldavax.s16 r6, r5, q1, q0
+; CHECK-NEXT:    vldrh.u16 q1, [r1, #-16]
+; CHECK-NEXT:    vmlsldava.s16 r8, r7, q2, q1
 ; CHECK-NEXT:    vldrh.u16 q0, [r1], #32
+; CHECK-NEXT:    vmlaldavax.s16 r6, r5, q2, q1
+; CHECK-NEXT:    vldrh.u16 q1, [r0], #32
 ; CHECK-NEXT:    le lr, .LBB0_2
-; CHECK-NEXT:  .LBB0_3:
-; CHECK-NEXT:    vmlaldavax.s16 r8, r5, q2, q3
-; CHECK-NEXT:    movs r6, #14
-; CHECK-NEXT:    and.w r2, r6, r2, lsl #1
-; CHECK-NEXT:    vmlaldavax.s16 r8, r5, q1, q0
+; CHECK-NEXT:  @ %bb.3: @ %do.body
+; CHECK-NEXT:    movs r4, #14
+; CHECK-NEXT:    and.w r2, r4, r2, lsl #1
+; CHECK-NEXT:    vmlaldavax.s16 r6, r5, q1, q0
 ; CHECK-NEXT:    vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT:    vmlsldava.s16 r4, r7, q1, q0
+; CHECK-NEXT:    vmlsldava.s16 r8, r7, q1, q0
 ; CHECK-NEXT:    vldrh.u16 q0, [r1, #-16]
-; CHECK-NEXT:    vmlaldavax.s16 r8, r5, q2, q0
+; CHECK-NEXT:    vmlaldavax.s16 r6, r5, q2, q0
 ; CHECK-NEXT:    vctp.16 r2
-; CHECK-NEXT:    vmlsldava.s16 r4, r7, q2, q0
+; CHECK-NEXT:    vmlsldava.s16 r8, r7, q2, q0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vldrht.u16 q1, [r0]
 ; CHECK-NEXT:    cmp r2, #9
 ; CHECK-NEXT:    vpsttt
 ; CHECK-NEXT:    vldrht.u16 q0, [r1]
-; CHECK-NEXT:    vmlsldavat.s16 r4, r7, q1, q0
-; CHECK-NEXT:    vmlaldavaxt.s16 r8, r5, q1, q0
+; CHECK-NEXT:    vmlsldavat.s16 r8, r7, q1, q0
+; CHECK-NEXT:    vmlaldavaxt.s16 r6, r5, q1, q0
 ; CHECK-NEXT:    blo .LBB0_10
 ; CHECK-NEXT:  @ %bb.4: @ %do.body.1
 ; CHECK-NEXT:    subs r2, #8
@@ -69,17 +58,17 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
 ; CHECK-NEXT:    vpstttt
 ; CHECK-NEXT:    vldrht.u16 q0, [r0, #16]
 ; CHECK-NEXT:    vldrht.u16 q1, [r1, #16]
-; CHECK-NEXT:    vmlsldavat.s16 r4, r7, q0, q1
-; CHECK-NEXT:    vmlaldavaxt.s16 r8, r5, q0, q1
+; CHECK-NEXT:    vmlsldavat.s16 r8, r7, q0, q1
+; CHECK-NEXT:    vmlaldavaxt.s16 r6, r5, q0, q1
 ; CHECK-NEXT:    b .LBB0_10
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  .LBB0_5: @ %if.else
-; CHECK-NEXT:    mov.w r4, #0
+; CHECK-NEXT:    mov.w r8, #0
 ; CHECK-NEXT:    cbz r2, .LBB0_9
 ; CHECK-NEXT:  @ %bb.6: @ %while.body14.preheader
 ; CHECK-NEXT:    lsls r6, r2, #1
-; CHECK-NEXT:    mov r5, r4
-; CHECK-NEXT:    mov r7, r4
+; CHECK-NEXT:    mov r5, r8
+; CHECK-NEXT:    mov r7, r8
 ; CHECK-NEXT:    movs r2, #0
 ; CHECK-NEXT:    dlstp.16 lr, r6
 ; CHECK-NEXT:    .p2align 2
@@ -88,22 +77,22 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
 ; CHECK-NEXT:    vldrh.u16 q0, [r0], #16
 ; CHECK-NEXT:    vldrh.u16 q1, [r1], #16
 ; CHECK-NEXT:    vmlsldava.s16 r2, r7, q0, q1
-; CHECK-NEXT:    vmlaldavax.s16 r4, r5, q0, q1
+; CHECK-NEXT:    vmlaldavax.s16 r8, r5, q0, q1
 ; CHECK-NEXT:    letp lr, .LBB0_7
 ; CHECK-NEXT:  @ %bb.8: @ %if.end.loopexit177
-; CHECK-NEXT:    mov r8, r4
-; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    mov r6, r8
+; CHECK-NEXT:    mov r8, r2
 ; CHECK-NEXT:    b .LBB0_10
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  .LBB0_9:
-; CHECK-NEXT:    mov r7, r4
-; CHECK-NEXT:    mov.w r8, #0
-; CHECK-NEXT:    mov r5, r4
+; CHECK-NEXT:    mov r7, r8
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r5, r8
 ; CHECK-NEXT:  .LBB0_10: @ %if.end
-; CHECK-NEXT:    asrl r4, r7, #6
-; CHECK-NEXT:    asrl r8, r5, #6
-; CHECK-NEXT:    str r4, [r3]
-; CHECK-NEXT:    str.w r8, [r12]
+; CHECK-NEXT:    asrl r8, r7, #6
+; CHECK-NEXT:    asrl r6, r5, #6
+; CHECK-NEXT:    str.w r8, [r3]
+; CHECK-NEXT:    str.w r6, [r12]
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %cmp = icmp ugt i32 %numSamples, 15

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for separating this out. ARM has a lot of nested register classes with overlapping registers, and the register pressure calculations for FP have never been very accurate. I always wanted to exclude certain register classes (like the one causing problems here), but doing that has never led to performance improvements overall. The register pressures looks somewhat like a random number generator to be honest, and the TRI version can be closer to reality in some places. We might have to make something that helps this test case for the time being and looks into the wider problem of fp reg-pressure later on.

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. LGTM

@wangpc-pp wangpc-pp merged commit f421a7a into llvm:main Dec 20, 2024
8 checks passed
@wangpc-pp wangpc-pp deleted the main-reg-pressure-set-arm branch December 20, 2024 03:43
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants