-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64][SME2] Preserve ZT0 state around function calls #78321
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
kmclaughlin-arm
merged 8 commits into
llvm:main
from
kmclaughlin-arm:sme2-shared-za-zt0
Jan 20, 2024
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
11dce21
[SME2][Clang] Add tests with ZT0 state
kmclaughlin-arm eef198b
[AArch64][SME2] Preserve ZT0 state around function calls
kmclaughlin-arm 510f1a6
- Added requiresZAToggle() to AArch64SMEAttributes
kmclaughlin-arm c9f0e35
- Run clang-format
kmclaughlin-arm 4f3a3e9
- Split requiresPreservingZT0 into requiresDisablingZABeforeCall/requ…
kmclaughlin-arm f901ecf
- Reduced number of tests added to SMEAttributesTest & added some wit…
kmclaughlin-arm dca9f00
- Rebased on main
kmclaughlin-arm 5e32ea9
Merge branch 'main' into sme2-shared-za-zt0
kmclaughlin-arm File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s | ||
|
||
declare void @callee(); | ||
|
||
; | ||
; Private-ZA Callee | ||
; | ||
|
||
; Expect spill & fill of ZT0 around call | ||
; Expect smstop/smstart za around call | ||
define void @zt0_in_caller_no_state_callee() "aarch64_in_zt0" nounwind { | ||
; CHECK-LABEL: zt0_in_caller_no_state_callee: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: sub sp, sp, #80 | ||
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill | ||
; CHECK-NEXT: mov x19, sp | ||
; CHECK-NEXT: str zt0, [x19] | ||
; CHECK-NEXT: smstop za | ||
; CHECK-NEXT: bl callee | ||
; CHECK-NEXT: smstart za | ||
; CHECK-NEXT: ldr zt0, [x19] | ||
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload | ||
; CHECK-NEXT: add sp, sp, #80 | ||
; CHECK-NEXT: ret | ||
call void @callee(); | ||
ret void; | ||
} | ||
|
||
; Expect spill & fill of ZT0 around call | ||
; Expect setup and restore lazy-save around call | ||
; Expect smstart za after call | ||
define void @za_zt0_shared_caller_no_state_callee() "aarch64_pstate_za_shared" "aarch64_in_zt0" nounwind { | ||
; CHECK-LABEL: za_zt0_shared_caller_no_state_callee: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill | ||
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill | ||
; CHECK-NEXT: mov x29, sp | ||
; CHECK-NEXT: sub sp, sp, #80 | ||
; CHECK-NEXT: rdsvl x8, #1 | ||
; CHECK-NEXT: mov x9, sp | ||
; CHECK-NEXT: msub x9, x8, x8, x9 | ||
; CHECK-NEXT: mov sp, x9 | ||
; CHECK-NEXT: sub x10, x29, #16 | ||
; CHECK-NEXT: sub x19, x29, #80 | ||
; CHECK-NEXT: stur wzr, [x29, #-4] | ||
; CHECK-NEXT: sturh wzr, [x29, #-6] | ||
; CHECK-NEXT: stur x9, [x29, #-16] | ||
; CHECK-NEXT: sturh w8, [x29, #-8] | ||
; CHECK-NEXT: msr TPIDR2_EL0, x10 | ||
; CHECK-NEXT: str zt0, [x19] | ||
; CHECK-NEXT: bl callee | ||
; CHECK-NEXT: smstart za | ||
; CHECK-NEXT: ldr zt0, [x19] | ||
; CHECK-NEXT: mrs x8, TPIDR2_EL0 | ||
; CHECK-NEXT: sub x0, x29, #16 | ||
; CHECK-NEXT: cbnz x8, .LBB1_2 | ||
; CHECK-NEXT: // %bb.1: | ||
; CHECK-NEXT: bl __arm_tpidr2_restore | ||
; CHECK-NEXT: .LBB1_2: | ||
; CHECK-NEXT: msr TPIDR2_EL0, xzr | ||
; CHECK-NEXT: mov sp, x29 | ||
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload | ||
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
call void @callee(); | ||
ret void; | ||
} | ||
|
||
; | ||
; Shared-ZA Callee | ||
; | ||
|
||
; Caller and callee have shared ZT0 state, no spill/fill of ZT0 required | ||
define void @zt0_shared_caller_zt0_shared_callee() "aarch64_in_zt0" nounwind { | ||
; CHECK-LABEL: zt0_shared_caller_zt0_shared_callee: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill | ||
; CHECK-NEXT: bl callee | ||
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
call void @callee() "aarch64_in_zt0"; | ||
ret void; | ||
} | ||
|
||
; Expect spill & fill of ZT0 around call | ||
define void @za_zt0_shared_caller_za_shared_callee() "aarch64_pstate_za_shared" "aarch64_in_zt0" nounwind { | ||
; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill | ||
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill | ||
; CHECK-NEXT: mov x29, sp | ||
; CHECK-NEXT: sub sp, sp, #80 | ||
; CHECK-NEXT: rdsvl x8, #1 | ||
; CHECK-NEXT: mov x9, sp | ||
; CHECK-NEXT: msub x8, x8, x8, x9 | ||
; CHECK-NEXT: mov sp, x8 | ||
; CHECK-NEXT: sub x19, x29, #80 | ||
; CHECK-NEXT: stur wzr, [x29, #-4] | ||
; CHECK-NEXT: sturh wzr, [x29, #-6] | ||
; CHECK-NEXT: stur x8, [x29, #-16] | ||
; CHECK-NEXT: str zt0, [x19] | ||
; CHECK-NEXT: bl callee | ||
; CHECK-NEXT: ldr zt0, [x19] | ||
; CHECK-NEXT: mov sp, x29 | ||
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload | ||
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
call void @callee() "aarch64_pstate_za_shared"; | ||
ret void; | ||
} | ||
|
||
; Caller and callee have shared ZA & ZT0 | ||
define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_pstate_za_shared" "aarch64_in_zt0" nounwind { | ||
; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill | ||
; CHECK-NEXT: mov x29, sp | ||
; CHECK-NEXT: sub sp, sp, #16 | ||
; CHECK-NEXT: rdsvl x8, #1 | ||
; CHECK-NEXT: mov x9, sp | ||
; CHECK-NEXT: msub x8, x8, x8, x9 | ||
; CHECK-NEXT: mov sp, x8 | ||
; CHECK-NEXT: stur wzr, [x29, #-4] | ||
; CHECK-NEXT: sturh wzr, [x29, #-6] | ||
; CHECK-NEXT: stur x8, [x29, #-16] | ||
; CHECK-NEXT: bl callee | ||
; CHECK-NEXT: mov sp, x29 | ||
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
call void @callee() "aarch64_pstate_za_shared" "aarch64_in_zt0"; | ||
ret void; | ||
} | ||
|
||
; New-ZA Callee | ||
|
||
; Expect spill & fill of ZT0 around call | ||
; Expect smstop/smstart za around call | ||
define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind { | ||
; CHECK-LABEL: zt0_in_caller_zt0_new_callee: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: sub sp, sp, #80 | ||
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill | ||
; CHECK-NEXT: mov x19, sp | ||
; CHECK-NEXT: str zt0, [x19] | ||
; CHECK-NEXT: smstop za | ||
; CHECK-NEXT: bl callee | ||
; CHECK-NEXT: smstart za | ||
; CHECK-NEXT: ldr zt0, [x19] | ||
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload | ||
; CHECK-NEXT: add sp, sp, #80 | ||
; CHECK-NEXT: ret | ||
call void @callee() "aarch64_new_zt0"; | ||
ret void; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.