Skip to content

Commit b59b8d4

Browse files
authored
[AMDGPU] Add GFX12 S_WAIT_* instructions (#77336)
GFX12 has separate wait instructions per counter e.g. S_WAIT_LOADCNT. S_WAITCNT still exists but is deprecated and codegen should stop using it. S_WAITCNT_* (e.g. S_WAITCNT_VSCNT) are removed. This patch adds/removes MC layer support for these instructions.
1 parent ae5575d commit b59b8d4

File tree

6 files changed

+164
-7
lines changed

6 files changed

+164
-7
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,14 +1196,12 @@ let SubtargetPredicate = isGFX10Plus in {
11961196
let SubtargetPredicate = isGFX10GFX11 in {
11971197
def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
11981198
def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
1199-
} // End SubtargetPredicate = isGFX10GFX11
12001199

1201-
let SubtargetPredicate = isGFX10Plus in {
12021200
def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
12031201
def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
12041202
def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
12051203
def S_WAITCNT_LGKMCNT : SOPK_WAITCNT<"s_waitcnt_lgkmcnt">;
1206-
} // End SubtargetPredicate = isGFX10Plus
1204+
} // End SubtargetPredicate = isGFX10GFX11
12071205

12081206
//===----------------------------------------------------------------------===//
12091207
// SOPC Instructions
@@ -1712,6 +1710,27 @@ let SubtargetPredicate = HasVGPRSingleUseHintInsts in {
17121710
SOPP_Pseudo<"s_singleuse_vdst", (ins s16imm:$simm16), "$simm16">;
17131711
} // End SubtargetPredicate = HasVGPRSingeUseHintInsts
17141712

1713+
let SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 in {
1714+
def S_WAIT_LOADCNT :
1715+
SOPP_Pseudo<"s_wait_loadcnt", (ins s16imm:$simm16), "$simm16">;
1716+
def S_WAIT_LOADCNT_DSCNT :
1717+
SOPP_Pseudo<"s_wait_loadcnt_dscnt", (ins s16imm:$simm16), "$simm16">;
1718+
def S_WAIT_STORECNT :
1719+
SOPP_Pseudo<"s_wait_storecnt", (ins s16imm:$simm16), "$simm16">;
1720+
def S_WAIT_STORECNT_DSCNT :
1721+
SOPP_Pseudo<"s_wait_storecnt_dscnt", (ins s16imm:$simm16), "$simm16">;
1722+
def S_WAIT_SAMPLECNT :
1723+
SOPP_Pseudo<"s_wait_samplecnt", (ins s16imm:$simm16), "$simm16">;
1724+
def S_WAIT_BVHCNT :
1725+
SOPP_Pseudo<"s_wait_bvhcnt", (ins s16imm:$simm16), "$simm16">;
1726+
def S_WAIT_EXPCNT :
1727+
SOPP_Pseudo<"s_wait_expcnt", (ins s16imm:$simm16), "$simm16">;
1728+
def S_WAIT_DSCNT :
1729+
SOPP_Pseudo<"s_wait_dscnt", (ins s16imm:$simm16), "$simm16">;
1730+
def S_WAIT_KMCNT :
1731+
SOPP_Pseudo<"s_wait_kmcnt", (ins s16imm:$simm16), "$simm16">;
1732+
} // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1
1733+
17151734
//===----------------------------------------------------------------------===//
17161735
// SOP1 Patterns
17171736
//===----------------------------------------------------------------------===//
@@ -2421,10 +2440,10 @@ defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11_gfx12<0x013>;
24212440
defm S_CALL_B64 : SOPK_Real32_gfx11_gfx12<0x014>;
24222441
defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx11<0x016>;
24232442
defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx11<0x017>;
2424-
defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11_gfx12<0x018>;
2425-
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11_gfx12<0x019>;
2426-
defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11_gfx12<0x01a>;
2427-
defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11_gfx12<0x01b>;
2443+
defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11<0x018>;
2444+
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11<0x019>;
2445+
defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11<0x01a>;
2446+
defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>;
24282447

24292448
//===----------------------------------------------------------------------===//
24302449
// SOPK - GFX10.
@@ -2526,6 +2545,15 @@ multiclass SOPP_Real_32_Renamed_gfx12<bits<7> op, SOPP_Pseudo backing_pseudo, st
25262545
defm S_WAIT_ALU : SOPP_Real_32_Renamed_gfx12<0x008, S_WAITCNT_DEPCTR, "s_wait_alu">;
25272546
defm S_BARRIER_WAIT : SOPP_Real_32_gfx12<0x014>;
25282547
defm S_BARRIER_LEAVE : SOPP_Real_32_gfx12<0x015>;
2548+
defm S_WAIT_LOADCNT : SOPP_Real_32_gfx12<0x040>;
2549+
defm S_WAIT_STORECNT : SOPP_Real_32_gfx12<0x041>;
2550+
defm S_WAIT_SAMPLECNT : SOPP_Real_32_gfx12<0x042>;
2551+
defm S_WAIT_BVHCNT : SOPP_Real_32_gfx12<0x043>;
2552+
defm S_WAIT_EXPCNT : SOPP_Real_32_gfx12<0x044>;
2553+
defm S_WAIT_DSCNT : SOPP_Real_32_gfx12<0x046>;
2554+
defm S_WAIT_KMCNT : SOPP_Real_32_gfx12<0x047>;
2555+
defm S_WAIT_LOADCNT_DSCNT : SOPP_Real_32_gfx12<0x048>;
2556+
defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;
25292557

25302558
//===----------------------------------------------------------------------===//
25312559
// SOPP - GFX11, GFX12.

llvm/test/MC/AMDGPU/gfx11_asm_err.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ v_interp_p2_f32 v0, -v1, v2, v3 wait_exp
3636
global_atomic_cmpswap_x2 v[1:4], v3, v[5:8], off offset:2047 glc
3737
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
3838

39+
// s_waitcnt_depctr is called s_wait_alu on GFX12, but its semantics and
40+
// encoding are identical. Even so, the new name should be rejected on GFX11
41+
s_wait_alu 0xfffe
42+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
43+
3944
v_cubesc_f32_e64_dpp v5, v1, v2, 12345678 row_shr:4 row_mask:0xf bank_mask:0xf
4045
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
4146

llvm/test/MC/AMDGPU/gfx12_asm_sopp.s

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,59 @@
11
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
22

3+
s_wait_loadcnt 0x1234
4+
// GFX12: encoding: [0x34,0x12,0xc0,0xbf]
5+
6+
s_wait_loadcnt 0xc1d1
7+
// GFX12: encoding: [0xd1,0xc1,0xc0,0xbf]
8+
9+
s_wait_storecnt 0x1234
10+
// GFX12: encoding: [0x34,0x12,0xc1,0xbf]
11+
12+
s_wait_storecnt 0xc1d1
13+
// GFX12: encoding: [0xd1,0xc1,0xc1,0xbf]
14+
15+
s_wait_samplecnt 0x1234
16+
// GFX12: encoding: [0x34,0x12,0xc2,0xbf]
17+
18+
s_wait_samplecnt 0xc1d1
19+
// GFX12: encoding: [0xd1,0xc1,0xc2,0xbf]
20+
21+
s_wait_bvhcnt 0x1234
22+
// GFX12: encoding: [0x34,0x12,0xc3,0xbf]
23+
24+
s_wait_bvhcnt 0xc1d1
25+
// GFX12: encoding: [0xd1,0xc1,0xc3,0xbf]
26+
27+
s_wait_expcnt 0x1234
28+
// GFX12: encoding: [0x34,0x12,0xc4,0xbf]
29+
30+
s_wait_expcnt 0xc1d1
31+
// GFX12: encoding: [0xd1,0xc1,0xc4,0xbf]
32+
33+
s_wait_dscnt 0x1234
34+
// GFX12: encoding: [0x34,0x12,0xc6,0xbf]
35+
36+
s_wait_dscnt 0xc1d1
37+
// GFX12: encoding: [0xd1,0xc1,0xc6,0xbf]
38+
39+
s_wait_kmcnt 0x1234
40+
// GFX12: encoding: [0x34,0x12,0xc7,0xbf]
41+
42+
s_wait_kmcnt 0xc1d1
43+
// GFX12: encoding: [0xd1,0xc1,0xc7,0xbf]
44+
45+
s_wait_loadcnt_dscnt 0x1234
46+
// GFX12: encoding: [0x34,0x12,0xc8,0xbf]
47+
48+
s_wait_loadcnt_dscnt 0xc1d1
49+
// GFX12: encoding: [0xd1,0xc1,0xc8,0xbf]
50+
51+
s_wait_storecnt_dscnt 0x1234
52+
// GFX12: encoding: [0x34,0x12,0xc9,0xbf]
53+
54+
s_wait_storecnt_dscnt 0xc1d1
55+
// GFX12: encoding: [0xd1,0xc1,0xc9,0xbf]
56+
357
s_wait_alu 0xfffe
458
// GFX12: encoding: [0xfe,0xff,0x88,0xbf]
559

llvm/test/MC/AMDGPU/gfx12_unsupported.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@
44
// Unsupported instructions.
55
//===----------------------------------------------------------------------===//
66

7+
s_waitcnt_expcnt exec_hi, 0x1234
8+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
9+
10+
s_waitcnt_lgkmcnt exec_hi, 0x1234
11+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
12+
13+
s_waitcnt_vmcnt exec_hi, 0x1234
14+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
15+
16+
s_waitcnt_vscnt exec_hi, 0x1234
17+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
18+
719
s_subvector_loop_begin s0, 0x1234
820
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
921

llvm/test/MC/Disassembler/AMDGPU/decode-err.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
# GFX11: [[@LINE+1]]:1: warning: invalid instruction encoding
1111
0x34,0x12,0x93,0xbf
1212

13+
# this is s_waitcnt_vscnt exec_hi, 0x1234, which is valid on gfx11, but not on gfx12
14+
# GFX12: [[@LINE+1]]:1: warning: invalid instruction encoding
15+
0x34,0x12,0x7f,0xbc
16+
1317
# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
1418
# W64: [[@LINE+1]]:1: warning: invalid instruction encoding
1519
0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,60 @@
66
# GFX12: s_wait_alu 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf]
77
0xfe,0xff,0x88,0xbf
88

9+
# GFX12: s_wait_loadcnt 0x1234 ; encoding: [0x34,0x12,0xc0,0xbf]
10+
0x34,0x12,0xc0,0xbf
11+
12+
# GFX12: s_wait_loadcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc0,0xbf]
13+
0xd1,0xc1,0xc0,0xbf
14+
15+
# GFX12: s_wait_storecnt 0x1234 ; encoding: [0x34,0x12,0xc1,0xbf]
16+
0x34,0x12,0xc1,0xbf
17+
18+
# GFX12: s_wait_storecnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc1,0xbf]
19+
0xd1,0xc1,0xc1,0xbf
20+
21+
# GFX12: s_wait_samplecnt 0x1234 ; encoding: [0x34,0x12,0xc2,0xbf]
22+
0x34,0x12,0xc2,0xbf
23+
24+
# GFX12: s_wait_samplecnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc2,0xbf]
25+
0xd1,0xc1,0xc2,0xbf
26+
27+
# GFX12: s_wait_bvhcnt 0x1234 ; encoding: [0x34,0x12,0xc3,0xbf]
28+
0x34,0x12,0xc3,0xbf
29+
30+
# GFX12: s_wait_bvhcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc3,0xbf]
31+
0xd1,0xc1,0xc3,0xbf
32+
33+
# GFX12: s_wait_expcnt 0x1234 ; encoding: [0x34,0x12,0xc4,0xbf]
34+
0x34,0x12,0xc4,0xbf
35+
36+
# GFX12: s_wait_expcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc4,0xbf]
37+
0xd1,0xc1,0xc4,0xbf
38+
39+
# GFX12: s_wait_dscnt 0x1234 ; encoding: [0x34,0x12,0xc6,0xbf]
40+
0x34,0x12,0xc6,0xbf
41+
42+
# GFX12: s_wait_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc6,0xbf]
43+
0xd1,0xc1,0xc6,0xbf
44+
45+
# GFX12: s_wait_kmcnt 0x1234 ; encoding: [0x34,0x12,0xc7,0xbf]
46+
0x34,0x12,0xc7,0xbf
47+
48+
# GFX12: s_wait_kmcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc7,0xbf]
49+
0xd1,0xc1,0xc7,0xbf
50+
51+
# GFX12: s_wait_loadcnt_dscnt 0x1234 ; encoding: [0x34,0x12,0xc8,0xbf]
52+
0x34,0x12,0xc8,0xbf
53+
54+
# GFX12: s_wait_loadcnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc8,0xbf]
55+
0xd1,0xc1,0xc8,0xbf
56+
57+
# GFX12: s_wait_storecnt_dscnt 0x1234 ; encoding: [0x34,0x12,0xc9,0xbf]
58+
0x34,0x12,0xc9,0xbf
59+
60+
# GFX12: s_wait_storecnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc9,0xbf]
61+
0xd1,0xc1,0xc9,0xbf
62+
963
# GFX12: s_singleuse_vdst 0x0 ; encoding: [0x00,0x00,0x93,0xbf]
1064
0x00,0x00,0x93,0xbf
1165

0 commit comments

Comments
 (0)