Skip to content

Commit 631e14f

Browse files
jayfoadDavid Salinas
authored and
David Salinas
committed
[AMDGPU] Enable architected SGPRs for GFX12 (llvm#79160)
Change-Id: I48da084bf12a1df7488eacaafed0fc14d2fbd68f
1 parent af647e2 commit 631e14f

File tree

3 files changed

+68
-40
lines changed

3 files changed

+68
-40
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

+1
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,7 @@ def FeatureISAVersion12 : FeatureSet<
14971497
FeatureWavefrontSize32,
14981498
FeatureShaderCyclesHiLoRegisters,
14991499
FeatureArchitectedFlatScratch,
1500+
FeatureArchitectedSGPRs,
15001501
FeatureAtomicFaddRtnInsts,
15011502
FeatureAtomicFaddNoRtnInsts,
15021503
FeatureAtomicDsPkAdd16Insts,

llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
5454
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
5555
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0
5656
; GFX12-NEXT: v_mov_b32_e32 v31, v0
57+
; GFX12-NEXT: s_mov_b32 s12, ttmp9
5758
; GFX12-NEXT: s_mov_b64 s[8:9], 0
5859
; GFX12-NEXT: s_mov_b32 s32, 0
5960
; GFX12-NEXT: s_wait_kmcnt 0x0

llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll

+66-40
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,28 @@
55
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
66

77
define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
8+
; GFX9-SDAG-LABEL: workgroup_id_x:
9+
; GFX9-SDAG: ; %bb.0:
10+
; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
12+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
13+
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
14+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
15+
; GFX9-SDAG-NEXT: s_endpgm
816
;
9-
; GFX9-LABEL: workgroup_id_x:
10-
; GFX9: ; %bb.0:
11-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12-
; GFX9-NEXT: v_mov_b32_e32 v0, ttmp9
13-
; GFX9-NEXT: v_mov_b32_e32 v1, 0
14-
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
15-
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
16-
; GFX9-NEXT: s_endpgm
17+
; GFX9-GISEL-LABEL: workgroup_id_x:
18+
; GFX9-GISEL: ; %bb.0:
19+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
20+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
21+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
22+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
23+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
24+
; GFX9-GISEL-NEXT: s_endpgm
1725
;
1826
; GFX12-SDAG-LABEL: workgroup_id_x:
1927
; GFX12-SDAG: ; %bb.0:
2028
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
21-
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
29+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
2230
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
2331
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
2432
; GFX12-SDAG-NEXT: s_nop 0
@@ -28,7 +36,7 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
2836
; GFX12-GISEL-LABEL: workgroup_id_x:
2937
; GFX12-GISEL: ; %bb.0:
3038
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
31-
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
39+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
3240
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
3341
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
3442
; GFX12-GISEL-NEXT: s_nop 0
@@ -44,23 +52,22 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
4452
; GFX9-LABEL: workgroup_id_xy:
4553
; GFX9: ; %bb.0:
4654
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
47-
; GFX9-NEXT: v_mov_b32_e32 v0, ttmp9
48-
; GFX9-NEXT: v_mov_b32_e32 v2, 0
49-
; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7
55+
; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9
5056
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
51-
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
52-
; GFX9-NEXT: global_store_dword v2, v1, s[2:3]
57+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
58+
; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7
59+
; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
5360
; GFX9-NEXT: s_endpgm
5461
;
5562
; GFX12-LABEL: workgroup_id_xy:
5663
; GFX12: ; %bb.0:
57-
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
58-
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
59-
; GFX12-NEXT: v_mov_b32_e32 v2, s3
64+
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
65+
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
66+
; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7
6067
; GFX12-NEXT: s_wait_kmcnt 0x0
6168
; GFX12-NEXT: s_clause 0x1
62-
; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
63-
; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
69+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
70+
; GFX12-NEXT: global_store_b32 v0, v2, s[2:3]
6471
; GFX12-NEXT: s_nop 0
6572
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6673
; GFX12-NEXT: s_endpgm
@@ -73,33 +80,52 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
7380
}
7481

7582
define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) {
76-
; GFX9-LABEL: workgroup_id_xyz:
77-
; GFX9: ; %bb.0:
78-
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
79-
; GFX9-NEXT: v_mov_b32_e32 v0, ttmp9
80-
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x10
81-
; GFX9-NEXT: v_mov_b32_e32 v1, 0
82-
; GFX9-NEXT: s_and_b32 s6, ttmp7, 0xffff
83-
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
84-
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
85-
; GFX9-NEXT: v_mov_b32_e32 v0, s6
86-
; GFX9-NEXT: s_lshr_b32 s0, ttmp7, 16
87-
; GFX9-NEXT: global_store_dword v1, v0, s[2:3]
88-
; GFX9-NEXT: v_mov_b32_e32 v0, s0
89-
; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
90-
; GFX9-NEXT: s_endpgm
83+
; GFX9-SDAG-LABEL: workgroup_id_xyz:
84+
; GFX9-SDAG: ; %bb.0:
85+
; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
86+
; GFX9-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
87+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
88+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
89+
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
90+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
91+
; GFX9-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
92+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
93+
; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
94+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
95+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
96+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
97+
; GFX9-SDAG-NEXT: s_endpgm
98+
;
99+
; GFX9-GISEL-LABEL: workgroup_id_xyz:
100+
; GFX9-GISEL: ; %bb.0:
101+
; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
102+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
103+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
104+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
105+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
106+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
107+
; GFX9-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
108+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
109+
; GFX9-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
110+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
111+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
112+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
113+
; GFX9-GISEL-NEXT: s_endpgm
91114
;
92115
; GFX12-LABEL: workgroup_id_xyz:
93116
; GFX12: ; %bb.0:
94117
; GFX12-NEXT: s_clause 0x1
95-
; GFX12-NEXT: s_load_b128 s[8:11], s[0:1], 0x0
118+
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
96119
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10
97-
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
98-
; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, s4
120+
; GFX12-NEXT: s_and_b32 s2, ttmp7, 0xffff
121+
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
122+
; GFX12-NEXT: s_lshr_b32 s3, ttmp7, 16
123+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
124+
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
99125
; GFX12-NEXT: s_wait_kmcnt 0x0
100126
; GFX12-NEXT: s_clause 0x2
101-
; GFX12-NEXT: global_store_b32 v0, v1, s[8:9]
102-
; GFX12-NEXT: global_store_b32 v0, v2, s[10:11]
127+
; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
128+
; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
103129
; GFX12-NEXT: global_store_b32 v0, v3, s[0:1]
104130
; GFX12-NEXT: s_nop 0
105131
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

0 commit comments

Comments
 (0)