@@ -2868,74 +2868,72 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
2868
2868
; GFX12-NEXT: s_wait_samplecnt 0x0
2869
2869
; GFX12-NEXT: s_wait_bvhcnt 0x0
2870
2870
; GFX12-NEXT: s_wait_kmcnt 0x0
2871
- ; GFX12-NEXT: s_clause 0x1b
2871
+ ; GFX12-NEXT: s_clause 0x1f
2872
+ ; GFX12-NEXT: scratch_load_b32 v31, off, s32
2872
2873
; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:8
2873
2874
; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:4
2874
2875
; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:16
2875
2876
; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:12
2876
- ; GFX12-NEXT: scratch_load_b32 v31, off, s32
2877
- ; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:120
2878
- ; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:104
2879
- ; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:24
2880
- ; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:20
2881
- ; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:32
2882
- ; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:28
2883
- ; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:40
2884
- ; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:36
2885
- ; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:48
2886
- ; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:44
2887
- ; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:56
2888
- ; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:52
2889
- ; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:64
2890
- ; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:60
2891
- ; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:72
2892
- ; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:68
2893
- ; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:80
2894
- ; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:76
2895
- ; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:88
2896
- ; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:84
2897
- ; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:96
2898
- ; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:92
2899
- ; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:100
2900
- ; GFX12-NEXT: s_wait_loadcnt 0x1a
2877
+ ; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:24
2878
+ ; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:20
2879
+ ; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:32
2880
+ ; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:28
2881
+ ; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:40
2882
+ ; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:36
2883
+ ; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:48
2884
+ ; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:44
2885
+ ; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:56
2886
+ ; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:52
2887
+ ; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:64
2888
+ ; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:60
2889
+ ; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:72
2890
+ ; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:68
2891
+ ; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:80
2892
+ ; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:76
2893
+ ; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:88
2894
+ ; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:84
2895
+ ; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:96
2896
+ ; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:92
2897
+ ; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:104
2898
+ ; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:100
2899
+ ; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:112
2900
+ ; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:108
2901
+ ; GFX12-NEXT: scratch_load_b32 v85, off, s32 offset:120
2902
+ ; GFX12-NEXT: scratch_load_b32 v84, off, s32 offset:116
2903
+ ; GFX12-NEXT: scratch_load_b32 v87, off, s32 offset:128
2904
+ ; GFX12-NEXT: scratch_load_b32 v86, off, s32 offset:124
2905
+ ; GFX12-NEXT: s_wait_loadcnt 0x1e
2901
2906
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[32:33]
2902
- ; GFX12-NEXT: s_clause 0x2
2903
- ; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:112
2904
- ; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:108
2905
- ; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:116
2906
- ; GFX12-NEXT: s_wait_loadcnt 0x1b
2907
+ ; GFX12-NEXT: s_wait_loadcnt 0x1c
2907
2908
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[34:35]
2908
- ; GFX12-NEXT: s_clause 0x1
2909
- ; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:128
2910
- ; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:124
2909
+ ; GFX12-NEXT: s_wait_loadcnt 0x1a
2910
+ ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[36:37]
2911
2911
; GFX12-NEXT: s_wait_loadcnt 0x18
2912
- ; GFX12-NEXT: v_maximum_f64 v[4:5 ], v[4:5 ], v[48:49 ]
2912
+ ; GFX12-NEXT: v_maximum_f64 v[6:7 ], v[6:7 ], v[38:39 ]
2913
2913
; GFX12-NEXT: s_wait_loadcnt 0x16
2914
- ; GFX12-NEXT: v_maximum_f64 v[6:7 ], v[6:7 ], v[50:51 ]
2914
+ ; GFX12-NEXT: v_maximum_f64 v[8:9 ], v[8:9 ], v[48:49 ]
2915
2915
; GFX12-NEXT: s_wait_loadcnt 0x14
2916
- ; GFX12-NEXT: v_maximum_f64 v[8:9 ], v[8:9 ], v[52:53 ]
2916
+ ; GFX12-NEXT: v_maximum_f64 v[10:11 ], v[10:11 ], v[50:51 ]
2917
2917
; GFX12-NEXT: s_wait_loadcnt 0x12
2918
- ; GFX12-NEXT: v_maximum_f64 v[10:11 ], v[10:11 ], v[54:55 ]
2918
+ ; GFX12-NEXT: v_maximum_f64 v[12:13 ], v[12:13 ], v[52:53 ]
2919
2919
; GFX12-NEXT: s_wait_loadcnt 0x10
2920
- ; GFX12-NEXT: v_maximum_f64 v[12:13 ], v[12:13 ], v[64:65 ]
2920
+ ; GFX12-NEXT: v_maximum_f64 v[14:15 ], v[14:15 ], v[54:55 ]
2921
2921
; GFX12-NEXT: s_wait_loadcnt 0xe
2922
- ; GFX12-NEXT: v_maximum_f64 v[14:15 ], v[14:15 ], v[66:67 ]
2922
+ ; GFX12-NEXT: v_maximum_f64 v[16:17 ], v[16:17 ], v[64:65 ]
2923
2923
; GFX12-NEXT: s_wait_loadcnt 0xc
2924
- ; GFX12-NEXT: v_maximum_f64 v[16:17 ], v[16:17 ], v[68:69 ]
2924
+ ; GFX12-NEXT: v_maximum_f64 v[18:19 ], v[18:19 ], v[66:67 ]
2925
2925
; GFX12-NEXT: s_wait_loadcnt 0xa
2926
- ; GFX12-NEXT: v_maximum_f64 v[18:19 ], v[18:19 ], v[70:71 ]
2926
+ ; GFX12-NEXT: v_maximum_f64 v[20:21 ], v[20:21 ], v[68:69 ]
2927
2927
; GFX12-NEXT: s_wait_loadcnt 0x8
2928
- ; GFX12-NEXT: v_maximum_f64 v[20:21 ], v[20:21 ], v[80:81 ]
2928
+ ; GFX12-NEXT: v_maximum_f64 v[22:23 ], v[22:23 ], v[70:71 ]
2929
2929
; GFX12-NEXT: s_wait_loadcnt 0x6
2930
- ; GFX12-NEXT: v_maximum_f64 v[22:23], v[22:23], v[82:83]
2931
- ; GFX12-NEXT: s_wait_loadcnt 0x5
2932
- ; GFX12-NEXT: v_maximum_f64 v[24:25], v[24:25], v[38:39]
2933
- ; GFX12-NEXT: s_wait_loadcnt 0x3
2934
- ; GFX12-NEXT: v_maximum_f64 v[26:27], v[26:27], v[32:33]
2930
+ ; GFX12-NEXT: v_maximum_f64 v[24:25], v[24:25], v[80:81]
2931
+ ; GFX12-NEXT: s_wait_loadcnt 0x4
2932
+ ; GFX12-NEXT: v_maximum_f64 v[26:27], v[26:27], v[82:83]
2935
2933
; GFX12-NEXT: s_wait_loadcnt 0x2
2936
- ; GFX12-NEXT: v_maximum_f64 v[28:29], v[28:29], v[36:37 ]
2934
+ ; GFX12-NEXT: v_maximum_f64 v[28:29], v[28:29], v[84:85 ]
2937
2935
; GFX12-NEXT: s_wait_loadcnt 0x0
2938
- ; GFX12-NEXT: v_maximum_f64 v[30:31], v[30:31], v[34:35 ]
2936
+ ; GFX12-NEXT: v_maximum_f64 v[30:31], v[30:31], v[86:87 ]
2939
2937
; GFX12-NEXT: s_setpc_b64 s[30:31]
2940
2938
%op = call <16 x double > @llvm.maximum.v16f64 (<16 x double > %src0 , <16 x double > %src1 )
2941
2939
ret <16 x double > %op
0 commit comments