@@ -466,28 +466,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
466
466
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
467
467
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
468
468
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
469
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
470
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
471
+ ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
469
472
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
470
473
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
471
474
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
472
475
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
473
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
474
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
475
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
476
- ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
477
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
478
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
479
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
480
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
476
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
481
477
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
482
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
483
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
484
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
478
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
479
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
485
480
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
486
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
487
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
481
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
488
482
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
489
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
490
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
483
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
491
484
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
492
485
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
493
486
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -498,7 +491,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
498
491
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
499
492
; GFX9-O0-NEXT: s_mov_b32 s14, s13
500
493
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
501
- ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
502
494
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
503
495
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
504
496
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1032,10 +1024,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1032
1024
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1033
1025
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1034
1026
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1035
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1036
- ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1037
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1038
- ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1027
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1028
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1029
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1030
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1039
1031
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1040
1032
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1041
1033
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -2737,28 +2729,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2737
2729
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2738
2730
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2739
2731
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
2732
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2733
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2734
+ ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2740
2735
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2741
2736
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2742
2737
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2743
2738
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2744
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
2745
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2746
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2747
- ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2748
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2749
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
2750
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2751
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2739
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2752
2740
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2753
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2754
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2755
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
2741
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2742
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
2756
2743
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2757
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2758
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
2744
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
2759
2745
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
2760
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2761
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
2746
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
2762
2747
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
2763
2748
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
2764
2749
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -2769,7 +2754,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2769
2754
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
2770
2755
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2771
2756
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
2772
- ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
2773
2757
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2774
2758
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
2775
2759
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -3303,10 +3287,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3303
3287
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3304
3288
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3305
3289
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3306
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3307
- ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3308
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3309
- ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3290
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3291
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3292
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3293
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3310
3294
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3311
3295
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3312
3296
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
0 commit comments