@@ -282,21 +282,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
282282; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
283283; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
284284; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
285- ; GFX9-O0-NEXT: v_mov_b32_e32 v5 , v16
285+ ; GFX9-O0-NEXT: v_mov_b32_e32 v1 , v16
286286; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
287- ; GFX9-O0-NEXT: v_mov_b32_e32 v1 , v14
287+ ; GFX9-O0-NEXT: v_mov_b32_e32 v5 , v14
288288; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
289- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
290- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291289; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
290+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
292292; GFX9-O0-NEXT: ; implicit-def: $sgpr4
293293; GFX9-O0-NEXT: ; implicit-def: $sgpr4
294- ; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
295- ; GFX9-O0-NEXT: v_mov_b32_e32 v10 , v5
294+ ; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
295+ ; GFX9-O0-NEXT: v_mov_b32_e32 v14 , v5
296296; GFX9-O0-NEXT: ; implicit-def: $sgpr4
297297; GFX9-O0-NEXT: ; implicit-def: $sgpr4
298- ; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
299- ; GFX9-O0-NEXT: v_mov_b32_e32 v14 , v1
298+ ; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
299+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10 , v1
300300; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
301301; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
302302; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
@@ -312,21 +312,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
312312; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
313313; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
314314; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
315- ; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
316- ; GFX9-O0-NEXT: v_mov_b32_e32 v7 , v11
315+ ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
316+ ; GFX9-O0-NEXT: v_mov_b32_e32 v8 , v11
317317; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
318318; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
319- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v8 , vcc, v8 , v5, vcc
320- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7 , v3, vcc
321- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7 , vcc, v2, v5, vcc
319+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7 , vcc, v7 , v5, vcc
320+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8 , v3, vcc
321+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v2 , vcc, v2, v5, vcc
322322; GFX9-O0-NEXT: ; implicit-def: $sgpr4
323323; GFX9-O0-NEXT: ; implicit-def: $sgpr4
324- ; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
325- ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
324+ ; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
325+ ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
326326; GFX9-O0-NEXT: ; implicit-def: $sgpr4
327327; GFX9-O0-NEXT: ; implicit-def: $sgpr4
328- ; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
329- ; GFX9-O0-NEXT: v_mov_b32_e32 v12 , v7
328+ ; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
329+ ; GFX9-O0-NEXT: v_mov_b32_e32 v2 , v7
330330; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
331331; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
332332; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
@@ -339,26 +339,18 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
339339; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
340340; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
341341; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
342- ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
343- ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
344- ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
342+ ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
345343; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
346- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
347- ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
348- ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
349- ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
344+ ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
345+ ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
350346; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
351- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
352- ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
353- ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
354- ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
347+ ; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
348+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
355349; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
356- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
357- ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
358- ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
359- ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
350+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
351+ ; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
360352; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
361- ; GFX9-O0-NEXT: buffer_store_dword v4 , off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
353+ ; GFX9-O0-NEXT: buffer_store_dword v14 , off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
362354; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
363355; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
364356; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
@@ -411,8 +403,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
411403; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
412404; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
413405; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
414- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
415- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
406+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
416407; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
417408; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
418409; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
@@ -448,8 +439,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
448439; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
449440; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
450441; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
451- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
452- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
442+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
453443; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
454444; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
455445; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
@@ -700,10 +690,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
700690; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
701691; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
702692; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
703- ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
704- ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
705- ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
706- ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
693+ ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
694+ ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
695+ ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
696+ ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
707697; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
708698; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
709699; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
@@ -913,14 +903,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
913903; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
914904; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
915905; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
916- ; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
917- ; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
918- ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
919- ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
920- ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
921- ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
922- ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
923- ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
906+ ; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
907+ ; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
908+ ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
909+ ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
910+ ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
911+ ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
912+ ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
913+ ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
924914; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
925915; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
926916; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -1038,10 +1028,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10381028; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
10391029; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
10401030; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
1041- ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1042- ; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1043- ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1044- ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1031+ ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1032+ ; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1033+ ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1034+ ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10451035; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
10461036; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10471037; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
0 commit comments