Skip to content

Commit 689c5c4

Browse files
[AMDGPU] Set total VGPRs to 1536 for gfx12 (#96272)
- Use Feature1_5xVGPRs
1 parent 17e51d5 commit 689c5c4

File tree

7 files changed

+1015
-1033
lines changed

7 files changed

+1015
-1033
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1611,7 +1611,8 @@ def FeatureISAVersion12 : FeatureSet<
16111611
FeatureVGPRSingleUseHintInsts,
16121612
FeatureScalarDwordx3Loads,
16131613
FeatureDPPSrc1SGPR,
1614-
FeatureMaxHardClauseLength32]>;
1614+
FeatureMaxHardClauseLength32,
1615+
Feature1_5xVGPRs]>;
16151616

16161617
def FeatureISAVersion12_Generic: FeatureSet<
16171618
!listconcat(FeatureISAVersion12.Features,

llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll

Lines changed: 48 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2868,74 +2868,72 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
28682868
; GFX12-NEXT: s_wait_samplecnt 0x0
28692869
; GFX12-NEXT: s_wait_bvhcnt 0x0
28702870
; GFX12-NEXT: s_wait_kmcnt 0x0
2871-
; GFX12-NEXT: s_clause 0x1b
2871+
; GFX12-NEXT: s_clause 0x1f
2872+
; GFX12-NEXT: scratch_load_b32 v31, off, s32
28722873
; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:8
28732874
; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:4
28742875
; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:16
28752876
; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:12
2876-
; GFX12-NEXT: scratch_load_b32 v31, off, s32
2877-
; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:120
2878-
; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:104
2879-
; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:24
2880-
; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:20
2881-
; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:32
2882-
; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:28
2883-
; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:40
2884-
; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:36
2885-
; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:48
2886-
; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:44
2887-
; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:56
2888-
; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:52
2889-
; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:64
2890-
; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:60
2891-
; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:72
2892-
; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:68
2893-
; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:80
2894-
; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:76
2895-
; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:88
2896-
; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:84
2897-
; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:96
2898-
; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:92
2899-
; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:100
2900-
; GFX12-NEXT: s_wait_loadcnt 0x1a
2877+
; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:24
2878+
; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:20
2879+
; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:32
2880+
; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:28
2881+
; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:40
2882+
; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:36
2883+
; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:48
2884+
; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:44
2885+
; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:56
2886+
; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:52
2887+
; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:64
2888+
; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:60
2889+
; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:72
2890+
; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:68
2891+
; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:80
2892+
; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:76
2893+
; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:88
2894+
; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:84
2895+
; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:96
2896+
; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:92
2897+
; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:104
2898+
; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:100
2899+
; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:112
2900+
; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:108
2901+
; GFX12-NEXT: scratch_load_b32 v85, off, s32 offset:120
2902+
; GFX12-NEXT: scratch_load_b32 v84, off, s32 offset:116
2903+
; GFX12-NEXT: scratch_load_b32 v87, off, s32 offset:128
2904+
; GFX12-NEXT: scratch_load_b32 v86, off, s32 offset:124
2905+
; GFX12-NEXT: s_wait_loadcnt 0x1e
29012906
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[32:33]
2902-
; GFX12-NEXT: s_clause 0x2
2903-
; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:112
2904-
; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:108
2905-
; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:116
2906-
; GFX12-NEXT: s_wait_loadcnt 0x1b
2907+
; GFX12-NEXT: s_wait_loadcnt 0x1c
29072908
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[34:35]
2908-
; GFX12-NEXT: s_clause 0x1
2909-
; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:128
2910-
; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:124
2909+
; GFX12-NEXT: s_wait_loadcnt 0x1a
2910+
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[36:37]
29112911
; GFX12-NEXT: s_wait_loadcnt 0x18
2912-
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[48:49]
2912+
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[38:39]
29132913
; GFX12-NEXT: s_wait_loadcnt 0x16
2914-
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[50:51]
2914+
; GFX12-NEXT: v_maximum_f64 v[8:9], v[8:9], v[48:49]
29152915
; GFX12-NEXT: s_wait_loadcnt 0x14
2916-
; GFX12-NEXT: v_maximum_f64 v[8:9], v[8:9], v[52:53]
2916+
; GFX12-NEXT: v_maximum_f64 v[10:11], v[10:11], v[50:51]
29172917
; GFX12-NEXT: s_wait_loadcnt 0x12
2918-
; GFX12-NEXT: v_maximum_f64 v[10:11], v[10:11], v[54:55]
2918+
; GFX12-NEXT: v_maximum_f64 v[12:13], v[12:13], v[52:53]
29192919
; GFX12-NEXT: s_wait_loadcnt 0x10
2920-
; GFX12-NEXT: v_maximum_f64 v[12:13], v[12:13], v[64:65]
2920+
; GFX12-NEXT: v_maximum_f64 v[14:15], v[14:15], v[54:55]
29212921
; GFX12-NEXT: s_wait_loadcnt 0xe
2922-
; GFX12-NEXT: v_maximum_f64 v[14:15], v[14:15], v[66:67]
2922+
; GFX12-NEXT: v_maximum_f64 v[16:17], v[16:17], v[64:65]
29232923
; GFX12-NEXT: s_wait_loadcnt 0xc
2924-
; GFX12-NEXT: v_maximum_f64 v[16:17], v[16:17], v[68:69]
2924+
; GFX12-NEXT: v_maximum_f64 v[18:19], v[18:19], v[66:67]
29252925
; GFX12-NEXT: s_wait_loadcnt 0xa
2926-
; GFX12-NEXT: v_maximum_f64 v[18:19], v[18:19], v[70:71]
2926+
; GFX12-NEXT: v_maximum_f64 v[20:21], v[20:21], v[68:69]
29272927
; GFX12-NEXT: s_wait_loadcnt 0x8
2928-
; GFX12-NEXT: v_maximum_f64 v[20:21], v[20:21], v[80:81]
2928+
; GFX12-NEXT: v_maximum_f64 v[22:23], v[22:23], v[70:71]
29292929
; GFX12-NEXT: s_wait_loadcnt 0x6
2930-
; GFX12-NEXT: v_maximum_f64 v[22:23], v[22:23], v[82:83]
2931-
; GFX12-NEXT: s_wait_loadcnt 0x5
2932-
; GFX12-NEXT: v_maximum_f64 v[24:25], v[24:25], v[38:39]
2933-
; GFX12-NEXT: s_wait_loadcnt 0x3
2934-
; GFX12-NEXT: v_maximum_f64 v[26:27], v[26:27], v[32:33]
2930+
; GFX12-NEXT: v_maximum_f64 v[24:25], v[24:25], v[80:81]
2931+
; GFX12-NEXT: s_wait_loadcnt 0x4
2932+
; GFX12-NEXT: v_maximum_f64 v[26:27], v[26:27], v[82:83]
29352933
; GFX12-NEXT: s_wait_loadcnt 0x2
2936-
; GFX12-NEXT: v_maximum_f64 v[28:29], v[28:29], v[36:37]
2934+
; GFX12-NEXT: v_maximum_f64 v[28:29], v[28:29], v[84:85]
29372935
; GFX12-NEXT: s_wait_loadcnt 0x0
2938-
; GFX12-NEXT: v_maximum_f64 v[30:31], v[30:31], v[34:35]
2936+
; GFX12-NEXT: v_maximum_f64 v[30:31], v[30:31], v[86:87]
29392937
; GFX12-NEXT: s_setpc_b64 s[30:31]
29402938
%op = call <16 x double> @llvm.maximum.v16f64(<16 x double> %src0, <16 x double> %src1)
29412939
ret <16 x double> %op

llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll

Lines changed: 48 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2868,74 +2868,72 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
28682868
; GFX12-NEXT: s_wait_samplecnt 0x0
28692869
; GFX12-NEXT: s_wait_bvhcnt 0x0
28702870
; GFX12-NEXT: s_wait_kmcnt 0x0
2871-
; GFX12-NEXT: s_clause 0x1b
2871+
; GFX12-NEXT: s_clause 0x1f
2872+
; GFX12-NEXT: scratch_load_b32 v31, off, s32
28722873
; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:8
28732874
; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:4
28742875
; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:16
28752876
; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:12
2876-
; GFX12-NEXT: scratch_load_b32 v31, off, s32
2877-
; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:120
2878-
; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:104
2879-
; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:24
2880-
; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:20
2881-
; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:32
2882-
; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:28
2883-
; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:40
2884-
; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:36
2885-
; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:48
2886-
; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:44
2887-
; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:56
2888-
; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:52
2889-
; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:64
2890-
; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:60
2891-
; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:72
2892-
; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:68
2893-
; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:80
2894-
; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:76
2895-
; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:88
2896-
; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:84
2897-
; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:96
2898-
; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:92
2899-
; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:100
2900-
; GFX12-NEXT: s_wait_loadcnt 0x1a
2877+
; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:24
2878+
; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:20
2879+
; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:32
2880+
; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:28
2881+
; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:40
2882+
; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:36
2883+
; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:48
2884+
; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:44
2885+
; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:56
2886+
; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:52
2887+
; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:64
2888+
; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:60
2889+
; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:72
2890+
; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:68
2891+
; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:80
2892+
; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:76
2893+
; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:88
2894+
; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:84
2895+
; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:96
2896+
; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:92
2897+
; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:104
2898+
; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:100
2899+
; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:112
2900+
; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:108
2901+
; GFX12-NEXT: scratch_load_b32 v85, off, s32 offset:120
2902+
; GFX12-NEXT: scratch_load_b32 v84, off, s32 offset:116
2903+
; GFX12-NEXT: scratch_load_b32 v87, off, s32 offset:128
2904+
; GFX12-NEXT: scratch_load_b32 v86, off, s32 offset:124
2905+
; GFX12-NEXT: s_wait_loadcnt 0x1e
29012906
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[32:33]
2902-
; GFX12-NEXT: s_clause 0x2
2903-
; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:112
2904-
; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:108
2905-
; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:116
2906-
; GFX12-NEXT: s_wait_loadcnt 0x1b
2907+
; GFX12-NEXT: s_wait_loadcnt 0x1c
29072908
; GFX12-NEXT: v_minimum_f64 v[2:3], v[2:3], v[34:35]
2908-
; GFX12-NEXT: s_clause 0x1
2909-
; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:128
2910-
; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:124
2909+
; GFX12-NEXT: s_wait_loadcnt 0x1a
2910+
; GFX12-NEXT: v_minimum_f64 v[4:5], v[4:5], v[36:37]
29112911
; GFX12-NEXT: s_wait_loadcnt 0x18
2912-
; GFX12-NEXT: v_minimum_f64 v[4:5], v[4:5], v[48:49]
2912+
; GFX12-NEXT: v_minimum_f64 v[6:7], v[6:7], v[38:39]
29132913
; GFX12-NEXT: s_wait_loadcnt 0x16
2914-
; GFX12-NEXT: v_minimum_f64 v[6:7], v[6:7], v[50:51]
2914+
; GFX12-NEXT: v_minimum_f64 v[8:9], v[8:9], v[48:49]
29152915
; GFX12-NEXT: s_wait_loadcnt 0x14
2916-
; GFX12-NEXT: v_minimum_f64 v[8:9], v[8:9], v[52:53]
2916+
; GFX12-NEXT: v_minimum_f64 v[10:11], v[10:11], v[50:51]
29172917
; GFX12-NEXT: s_wait_loadcnt 0x12
2918-
; GFX12-NEXT: v_minimum_f64 v[10:11], v[10:11], v[54:55]
2918+
; GFX12-NEXT: v_minimum_f64 v[12:13], v[12:13], v[52:53]
29192919
; GFX12-NEXT: s_wait_loadcnt 0x10
2920-
; GFX12-NEXT: v_minimum_f64 v[12:13], v[12:13], v[64:65]
2920+
; GFX12-NEXT: v_minimum_f64 v[14:15], v[14:15], v[54:55]
29212921
; GFX12-NEXT: s_wait_loadcnt 0xe
2922-
; GFX12-NEXT: v_minimum_f64 v[14:15], v[14:15], v[66:67]
2922+
; GFX12-NEXT: v_minimum_f64 v[16:17], v[16:17], v[64:65]
29232923
; GFX12-NEXT: s_wait_loadcnt 0xc
2924-
; GFX12-NEXT: v_minimum_f64 v[16:17], v[16:17], v[68:69]
2924+
; GFX12-NEXT: v_minimum_f64 v[18:19], v[18:19], v[66:67]
29252925
; GFX12-NEXT: s_wait_loadcnt 0xa
2926-
; GFX12-NEXT: v_minimum_f64 v[18:19], v[18:19], v[70:71]
2926+
; GFX12-NEXT: v_minimum_f64 v[20:21], v[20:21], v[68:69]
29272927
; GFX12-NEXT: s_wait_loadcnt 0x8
2928-
; GFX12-NEXT: v_minimum_f64 v[20:21], v[20:21], v[80:81]
2928+
; GFX12-NEXT: v_minimum_f64 v[22:23], v[22:23], v[70:71]
29292929
; GFX12-NEXT: s_wait_loadcnt 0x6
2930-
; GFX12-NEXT: v_minimum_f64 v[22:23], v[22:23], v[82:83]
2931-
; GFX12-NEXT: s_wait_loadcnt 0x5
2932-
; GFX12-NEXT: v_minimum_f64 v[24:25], v[24:25], v[38:39]
2933-
; GFX12-NEXT: s_wait_loadcnt 0x3
2934-
; GFX12-NEXT: v_minimum_f64 v[26:27], v[26:27], v[32:33]
2930+
; GFX12-NEXT: v_minimum_f64 v[24:25], v[24:25], v[80:81]
2931+
; GFX12-NEXT: s_wait_loadcnt 0x4
2932+
; GFX12-NEXT: v_minimum_f64 v[26:27], v[26:27], v[82:83]
29352933
; GFX12-NEXT: s_wait_loadcnt 0x2
2936-
; GFX12-NEXT: v_minimum_f64 v[28:29], v[28:29], v[36:37]
2934+
; GFX12-NEXT: v_minimum_f64 v[28:29], v[28:29], v[84:85]
29372935
; GFX12-NEXT: s_wait_loadcnt 0x0
2938-
; GFX12-NEXT: v_minimum_f64 v[30:31], v[30:31], v[34:35]
2936+
; GFX12-NEXT: v_minimum_f64 v[30:31], v[30:31], v[86:87]
29392937
; GFX12-NEXT: s_setpc_b64 s[30:31]
29402938
%op = call <16 x double> @llvm.minimum.v16f64(<16 x double> %src0, <16 x double> %src1)
29412939
ret <16 x double> %op

0 commit comments

Comments
 (0)