@@ -482,28 +482,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
482
482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
483
483
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
484
484
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
485
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
486
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
487
+ ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
485
488
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
486
489
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
487
490
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
488
491
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
489
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
490
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
491
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
492
- ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
493
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
494
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
495
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
496
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
492
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
497
493
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
498
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
499
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
500
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
494
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
495
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
501
496
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
502
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
503
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
497
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
504
498
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
505
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
506
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
499
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
507
500
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
508
501
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
509
502
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -514,7 +507,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
514
507
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
515
508
; GFX9-O0-NEXT: s_mov_b32 s14, s13
516
509
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
517
- ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
518
510
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
519
511
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
520
512
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1048,10 +1040,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1048
1040
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1049
1041
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1050
1042
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1051
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1052
- ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1053
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1054
- ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1043
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1044
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1045
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1046
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1055
1047
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1056
1048
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1057
1049
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -2695,28 +2687,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2695
2687
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2696
2688
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2697
2689
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
2690
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2691
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2692
+ ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2698
2693
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2699
2694
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2700
2695
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2701
2696
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2702
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
2703
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2704
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2705
- ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2706
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2707
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
2708
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2709
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2697
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2710
2698
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2711
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2712
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2713
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
2699
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2700
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
2714
2701
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2715
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2716
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
2702
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
2717
2703
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
2718
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2719
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
2704
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
2720
2705
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
2721
2706
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
2722
2707
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -2727,7 +2712,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2727
2712
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
2728
2713
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2729
2714
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
2730
- ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
2731
2715
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2732
2716
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
2733
2717
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -3261,10 +3245,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3261
3245
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3262
3246
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3263
3247
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3264
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3265
- ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3266
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3267
- ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3248
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3249
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3250
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3251
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3268
3252
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3269
3253
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3270
3254
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
0 commit comments